From d6ff44d647a9c63911983da9979961fde306b3aa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 14 May 2016 00:59:27 +0200 Subject: [PATCH 01/41] cpufreq: governor: CPUFREQ_GOV_LIMITS never fails None of the cpufreq governors currently in the tree will ever fail an invocation of the ->governor() callback with the event argument equal to CPUFREQ_GOV_LIMITS (unless invoked with incorrect arguments which doesn't matter anyway) and had it ever failed, the result of it wouldn't have been very clean. For this reason, rearrange the code in the core to ignore the return value of cpufreq_governor() when called with event equal to CPUFREQ_GOV_LIMITS. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 36bc11a106aa07..e3e666109e2144 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2054,7 +2054,11 @@ static int cpufreq_start_governor(struct cpufreq_policy *policy) cpufreq_update_current_freq(policy); ret = cpufreq_governor(policy, CPUFREQ_GOV_START); - return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + if (ret) + return ret; + + cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + return 0; } int cpufreq_register_governor(struct cpufreq_governor *governor) @@ -2195,7 +2199,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, if (new_policy->governor == policy->governor) { pr_debug("cpufreq: governor limits update\n"); - return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + return 0; } pr_debug("governor switch\n"); From 9e8c0a899b2f89b8367d33668f6819892f0a00f2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 14 May 2016 01:00:28 +0200 Subject: [PATCH 02/41] cpufreq: governor: Check transition latecy at init time only It is not necessary to check the governor's max_transition_latency attribute every time cpufreq_governor() runs, so check it only if the event argument is CPUFREQ_GOV_POLICY_INIT. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e3e666109e2144..a5f1d016d47341 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2011,23 +2011,24 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) if (!policy->governor) return -EINVAL; - if (policy->governor->max_transition_latency && - policy->cpuinfo.transition_latency > - policy->governor->max_transition_latency) { - struct cpufreq_governor *gov = cpufreq_fallback_governor(); - - if (gov) { - pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", - policy->governor->name, gov->name); - policy->governor = gov; - } else { - return -EINVAL; + if (event == CPUFREQ_GOV_POLICY_INIT) { + if (policy->governor->max_transition_latency && + policy->cpuinfo.transition_latency > + policy->governor->max_transition_latency) { + struct cpufreq_governor *gov = cpufreq_fallback_governor(); + + if (gov) { + pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", + policy->governor->name, gov->name); + policy->governor = gov; + } else { + return -EINVAL; + } } - } - if (event == CPUFREQ_GOV_POLICY_INIT) if (!try_module_get(policy->governor->owner)) return -EINVAL; + } pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); From 16de72b9f4d07e393bea7b1221898b3be718a296 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 14 May 2016 01:01:08 +0200 Subject: [PATCH 03/41] cpufreq: governor: Simplify performance and powersave governors The performance and powersave cpufreq governors handle the CPUFREQ_GOV_START event in the same way as CPUFREQ_GOV_LIMITS. However, the cpufreq core always invokes cpufreq_governor() with the event argument equal to CPUFREQ_GOV_LIMITS right after invoking it with event equal to CPUFREQ_GOV_START. As a result, for both the governors in question, __cpufreq_driver_target() is executed twice in a row with the same arguments which is not useful. For this reason, simplify the performance and powersave governors to handle the CPUFREQ_GOV_LIMITS event only as that's going to be sufficient for the governor start too. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_performance.c | 4 +--- drivers/cpufreq/cpufreq_powersave.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index af9f4b96f5a8e1..dd4dca3ab84491 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -20,10 +20,8 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy, unsigned int event) { switch (event) { - case CPUFREQ_GOV_START: case CPUFREQ_GOV_LIMITS: - pr_debug("setting to %u kHz because of event %u\n", - policy->max, event); + pr_debug("setting to %u kHz\n", policy->max); __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); break; diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index b8b400232a7451..691e573e46eeb4 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -20,10 +20,8 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy, unsigned int event) { switch (event) { - case CPUFREQ_GOV_START: case CPUFREQ_GOV_LIMITS: - pr_debug("setting to %u kHz because of event %u\n", - policy->min, event); + pr_debug("setting to %u kHz\n", policy->min); __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); break; From a92604b419f47e1c5098632742d8e031f6e8fab1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 14 May 2016 01:01:46 +0200 Subject: [PATCH 04/41] cpufreq: Split cpufreq_governor() into simpler functions The cpufreq_governor() routine is used by the cpufreq core to invoke the current governor's ->governor() callback with appropriate arguments and do some housekeeping related to that. Unfortunately, the way it mixes different governor events in one code path makes it rather hard to follow the code. For this reason, split cpufreq_governor() into five simpler functions that each will handle just one specific governor event and put all of the code related to the given event into its own function. This change is a prerequisite for a redesign of the cpufreq governor API that will be done subsequently. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 113 +++++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 43 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a5f1d016d47341..268566e4068440 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -74,19 +74,12 @@ static inline bool has_target(void) } /* internal prototypes */ -static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static unsigned int __cpufreq_get(struct cpufreq_policy *policy); +static int cpufreq_init_governor(struct cpufreq_policy *policy); +static void cpufreq_exit_governor(struct cpufreq_policy *policy); static int cpufreq_start_governor(struct cpufreq_policy *policy); - -static inline void cpufreq_exit_governor(struct cpufreq_policy *policy) -{ - (void)cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); -} - -static inline void cpufreq_stop_governor(struct cpufreq_policy *policy) -{ - (void)cpufreq_governor(policy, CPUFREQ_GOV_STOP); -} +static void cpufreq_stop_governor(struct cpufreq_policy *policy); +static void cpufreq_governor_limits(struct cpufreq_policy *policy); /** * Two notifier lists: the "policy" list is involved in the @@ -1997,7 +1990,7 @@ __weak struct cpufreq_governor *cpufreq_fallback_governor(void) return NULL; } -static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) +static int cpufreq_init_governor(struct cpufreq_policy *policy) { int ret; @@ -2011,57 +2004,91 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) if (!policy->governor) return -EINVAL; - if (event == CPUFREQ_GOV_POLICY_INIT) { - if (policy->governor->max_transition_latency && - policy->cpuinfo.transition_latency > - policy->governor->max_transition_latency) { - struct cpufreq_governor *gov = cpufreq_fallback_governor(); - - if (gov) { - pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", - policy->governor->name, gov->name); - policy->governor = gov; - } else { - return -EINVAL; - } - } + if (policy->governor->max_transition_latency && + policy->cpuinfo.transition_latency > + policy->governor->max_transition_latency) { + struct cpufreq_governor *gov = cpufreq_fallback_governor(); - if (!try_module_get(policy->governor->owner)) + if (gov) { + pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", + policy->governor->name, gov->name); + policy->governor = gov; + } else { return -EINVAL; + } } - pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); + if (!try_module_get(policy->governor->owner)) + return -EINVAL; - ret = policy->governor->governor(policy, event); + pr_debug("%s: for CPU %u\n", __func__, policy->cpu); - if (event == CPUFREQ_GOV_POLICY_INIT) { - if (ret) - module_put(policy->governor->owner); - else - policy->governor->initialized++; - } else if (event == CPUFREQ_GOV_POLICY_EXIT) { - policy->governor->initialized--; + ret = policy->governor->governor(policy, CPUFREQ_GOV_POLICY_INIT); + if (ret) { module_put(policy->governor->owner); + return ret; } - return ret; + policy->governor->initialized++; + return 0; +} + +static void cpufreq_exit_governor(struct cpufreq_policy *policy) +{ + if (cpufreq_suspended || !policy->governor) + return; + + pr_debug("%s: for CPU %u\n", __func__, policy->cpu); + + policy->governor->governor(policy, CPUFREQ_GOV_POLICY_EXIT); + + policy->governor->initialized--; + module_put(policy->governor->owner); } static int cpufreq_start_governor(struct cpufreq_policy *policy) { int ret; + if (cpufreq_suspended) + return 0; + + if (!policy->governor) + return -EINVAL; + + pr_debug("%s: for CPU %u\n", __func__, policy->cpu); + if (cpufreq_driver->get && !cpufreq_driver->setpolicy) cpufreq_update_current_freq(policy); - ret = cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = policy->governor->governor(policy, CPUFREQ_GOV_START); if (ret) return ret; - cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + policy->governor->governor(policy, CPUFREQ_GOV_LIMITS); return 0; } +static void cpufreq_stop_governor(struct cpufreq_policy *policy) +{ + if (cpufreq_suspended || !policy->governor) + return; + + pr_debug("%s: for CPU %u\n", __func__, policy->cpu); + + policy->governor->governor(policy, CPUFREQ_GOV_STOP); +} + +static void cpufreq_governor_limits(struct cpufreq_policy *policy) +{ + if (cpufreq_suspended || !policy->governor) + return; + + pr_debug("%s: for CPU %u\n", __func__, policy->cpu); + + policy->governor->governor(policy, CPUFREQ_GOV_LIMITS); +} + int cpufreq_register_governor(struct cpufreq_governor *governor) { int err; @@ -2200,7 +2227,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, if (new_policy->governor == policy->governor) { pr_debug("cpufreq: governor limits update\n"); - cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + cpufreq_governor_limits(policy); return 0; } @@ -2216,7 +2243,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, /* start new governor */ policy->governor = new_policy->governor; - ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); + ret = cpufreq_init_governor(policy); if (!ret) { ret = cpufreq_start_governor(policy); if (!ret) { @@ -2230,7 +2257,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, pr_debug("starting governor %s failed\n", policy->governor->name); if (old_gov) { policy->governor = old_gov; - if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) + if (cpufreq_init_governor(policy)) policy->governor = NULL; else cpufreq_start_governor(policy); @@ -2328,7 +2355,7 @@ static int cpufreq_boost_set_sw(int state) down_write(&policy->rwsem); policy->user_policy.max = policy->max; - cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + cpufreq_governor_limits(policy); up_write(&policy->rwsem); } } From e788892ba3cc71d385b75895f7a375fbc659ce86 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Jun 2016 23:24:15 +0200 Subject: [PATCH 05/41] cpufreq: governor: Get rid of governor events The design of the cpufreq governor API is not very straightforward, as struct cpufreq_governor provides only one callback to be invoked from different code paths for different purposes. The purpose it is invoked for is determined by its second "event" argument, causing it to act as a "callback multiplexer" of sorts. Unfortunately, that leads to extra complexity in governors, some of which implement the ->governor() callback as a switch statement that simply checks the event argument and invokes a separate function to handle that specific event. That extra complexity can be eliminated by replacing the all-purpose ->governor() callback with a family of callbacks to carry out specific governor operations: initialization and exit, start and stop and policy limits updates. That also turns out to reduce the code size too, so do it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- .../platforms/cell/cpufreq_spudemand.c | 72 ++++++------ drivers/cpufreq/cpufreq.c | 31 ++++-- drivers/cpufreq/cpufreq_conservative.c | 7 +- drivers/cpufreq/cpufreq_governor.c | 39 ++----- drivers/cpufreq/cpufreq_governor.h | 20 +++- drivers/cpufreq/cpufreq_ondemand.c | 7 +- drivers/cpufreq/cpufreq_performance.c | 17 +-- drivers/cpufreq/cpufreq_powersave.c | 17 +-- drivers/cpufreq/cpufreq_userspace.c | 104 +++++++++--------- include/linux/cpufreq.h | 14 +-- kernel/sched/cpufreq_schedutil.c | 34 ++---- 11 files changed, 158 insertions(+), 204 deletions(-) diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c index 82607d621aca41..88301e53f0856b 100644 --- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c +++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c @@ -85,61 +85,57 @@ static void spu_gov_cancel_work(struct spu_gov_info_struct *info) cancel_delayed_work_sync(&info->work); } -static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event) +static int spu_gov_start(struct cpufreq_policy *policy) { unsigned int cpu = policy->cpu; - struct spu_gov_info_struct *info, *affected_info; + struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu); + struct spu_gov_info_struct *affected_info; int i; - int ret = 0; - info = &per_cpu(spu_gov_info, cpu); - - switch (event) { - case CPUFREQ_GOV_START: - if (!cpu_online(cpu)) { - printk(KERN_ERR "cpu %d is not online\n", cpu); - ret = -EINVAL; - break; - } + if (!cpu_online(cpu)) { + printk(KERN_ERR "cpu %d is not online\n", cpu); + return -EINVAL; + } - if (!policy->cur) { - printk(KERN_ERR "no cpu specified in policy\n"); - ret = -EINVAL; - break; - } + if (!policy->cur) { + printk(KERN_ERR "no cpu specified in policy\n"); + return -EINVAL; + } - /* initialize spu_gov_info for all affected cpus */ - for_each_cpu(i, policy->cpus) { - affected_info = &per_cpu(spu_gov_info, i); - affected_info->policy = policy; - } + /* initialize spu_gov_info for all affected cpus */ + for_each_cpu(i, policy->cpus) { + affected_info = &per_cpu(spu_gov_info, i); + affected_info->policy = policy; + } - info->poll_int = POLL_TIME; + info->poll_int = POLL_TIME; - /* setup timer */ - spu_gov_init_work(info); + /* setup timer */ + spu_gov_init_work(info); - break; + return 0; +} - case CPUFREQ_GOV_STOP: - /* cancel timer */ - spu_gov_cancel_work(info); +static void spu_gov_stop(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu); + int i; - /* clean spu_gov_info for all affected cpus */ - for_each_cpu (i, policy->cpus) { - info = &per_cpu(spu_gov_info, i); - info->policy = NULL; - } + /* cancel timer */ + spu_gov_cancel_work(info); - break; + /* clean spu_gov_info for all affected cpus */ + for_each_cpu (i, policy->cpus) { + info = &per_cpu(spu_gov_info, i); + info->policy = NULL; } - - return ret; } static struct cpufreq_governor spu_governor = { .name = "spudemand", - .governor = spu_gov_govern, + .start = spu_gov_start, + .stop = spu_gov_stop, .owner = THIS_MODULE, }; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 268566e4068440..d98ff688b2f110 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2023,10 +2023,12 @@ static int cpufreq_init_governor(struct cpufreq_policy *policy) pr_debug("%s: for CPU %u\n", __func__, policy->cpu); - ret = policy->governor->governor(policy, CPUFREQ_GOV_POLICY_INIT); - if (ret) { - module_put(policy->governor->owner); - return ret; + if (policy->governor->init) { + ret = policy->governor->init(policy); + if (ret) { + module_put(policy->governor->owner); + return ret; + } } policy->governor->initialized++; @@ -2040,7 +2042,8 @@ static void cpufreq_exit_governor(struct cpufreq_policy *policy) pr_debug("%s: for CPU %u\n", __func__, policy->cpu); - policy->governor->governor(policy, CPUFREQ_GOV_POLICY_EXIT); + if (policy->governor->exit) + policy->governor->exit(policy); policy->governor->initialized--; module_put(policy->governor->owner); @@ -2061,11 +2064,15 @@ static int cpufreq_start_governor(struct cpufreq_policy *policy) if (cpufreq_driver->get && !cpufreq_driver->setpolicy) cpufreq_update_current_freq(policy); - ret = policy->governor->governor(policy, CPUFREQ_GOV_START); - if (ret) - return ret; + if (policy->governor->start) { + ret = policy->governor->start(policy); + if (ret) + return ret; + } + + if (policy->governor->limits) + policy->governor->limits(policy); - policy->governor->governor(policy, CPUFREQ_GOV_LIMITS); return 0; } @@ -2076,7 +2083,8 @@ static void cpufreq_stop_governor(struct cpufreq_policy *policy) pr_debug("%s: for CPU %u\n", __func__, policy->cpu); - policy->governor->governor(policy, CPUFREQ_GOV_STOP); + if (policy->governor->stop) + policy->governor->stop(policy); } static void cpufreq_governor_limits(struct cpufreq_policy *policy) @@ -2086,7 +2094,8 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy) pr_debug("%s: for CPU %u\n", __func__, policy->cpu); - policy->governor->governor(policy, CPUFREQ_GOV_LIMITS); + if (policy->governor->limits) + policy->governor->limits(policy); } int cpufreq_register_governor(struct cpufreq_governor *governor) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 316df247e00da3..2568508fca3878 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -313,12 +313,7 @@ static void cs_start(struct cpufreq_policy *policy) } static struct dbs_governor cs_dbs_gov = { - .gov = { - .name = "conservative", - .governor = cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, - }, + .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), .kobj_type = { .default_attrs = cs_attributes }, .gov_dbs_timer = cs_dbs_timer, .alloc = cs_alloc, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index be498d56dd6971..ca9927c15a71a4 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -389,7 +389,7 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, gov->free(policy_dbs); } -static int cpufreq_governor_init(struct cpufreq_policy *policy) +int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct dbs_data *dbs_data; @@ -474,8 +474,9 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) mutex_unlock(&gov_dbs_data_mutex); return ret; } +EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_init); -static int cpufreq_governor_exit(struct cpufreq_policy *policy) +void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; @@ -500,10 +501,10 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) free_policy_dbs_info(policy_dbs, gov); mutex_unlock(&gov_dbs_data_mutex); - return 0; } +EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_exit); -static int cpufreq_governor_start(struct cpufreq_policy *policy) +int cpufreq_dbs_governor_start(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; @@ -539,14 +540,15 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) gov_set_update_util(policy_dbs, sampling_rate); return 0; } +EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_start); -static int cpufreq_governor_stop(struct cpufreq_policy *policy) +void cpufreq_dbs_governor_stop(struct cpufreq_policy *policy) { gov_cancel_work(policy); - return 0; } +EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop); -static int cpufreq_governor_limits(struct cpufreq_policy *policy) +void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; @@ -560,26 +562,5 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) gov_update_sample_delay(policy_dbs, 0); mutex_unlock(&policy_dbs->timer_mutex); - - return 0; -} - -int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) -{ - if (event == CPUFREQ_GOV_POLICY_INIT) { - return cpufreq_governor_init(policy); - } else if (policy->governor_data) { - switch (event) { - case CPUFREQ_GOV_POLICY_EXIT: - return cpufreq_governor_exit(policy); - case CPUFREQ_GOV_START: - return cpufreq_governor_start(policy); - case CPUFREQ_GOV_STOP: - return cpufreq_governor_stop(policy); - case CPUFREQ_GOV_LIMITS: - return cpufreq_governor_limits(policy); - } - } - return -EINVAL; } -EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); +EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_limits); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 34eb214b6d57a4..36f0d19dd8691e 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -148,6 +148,25 @@ static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy return container_of(policy->governor, struct dbs_governor, gov); } +/* Governor callback routines */ +int cpufreq_dbs_governor_init(struct cpufreq_policy *policy); +void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy); +int cpufreq_dbs_governor_start(struct cpufreq_policy *policy); +void cpufreq_dbs_governor_stop(struct cpufreq_policy *policy); +void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy); + +#define CPUFREQ_DBS_GOVERNOR_INITIALIZER(_name_) \ + { \ + .name = _name_, \ + .max_transition_latency = TRANSITION_LATENCY_LIMIT, \ + .owner = THIS_MODULE, \ + .init = cpufreq_dbs_governor_init, \ + .exit = cpufreq_dbs_governor_exit, \ + .start = cpufreq_dbs_governor_start, \ + .stop = cpufreq_dbs_governor_stop, \ + .limits = cpufreq_dbs_governor_limits, \ + } + /* Governor specific operations */ struct od_ops { unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, @@ -155,7 +174,6 @@ struct od_ops { }; unsigned int dbs_update(struct cpufreq_policy *policy); -int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 30016343051676..09d6c0c405e4f4 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -420,12 +420,7 @@ static struct od_ops od_ops = { }; static struct dbs_governor od_dbs_gov = { - .gov = { - .name = "ondemand", - .governor = cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, - }, + .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"), .kobj_type = { .default_attrs = od_attributes }, .gov_dbs_timer = od_dbs_timer, .alloc = od_alloc, diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index dd4dca3ab84491..dafb679adc589e 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -16,25 +16,16 @@ #include #include -static int cpufreq_governor_performance(struct cpufreq_policy *policy, - unsigned int event) +static void cpufreq_gov_performance_limits(struct cpufreq_policy *policy) { - switch (event) { - case CPUFREQ_GOV_LIMITS: - pr_debug("setting to %u kHz\n", policy->max); - __cpufreq_driver_target(policy, policy->max, - CPUFREQ_RELATION_H); - break; - default: - break; - } - return 0; + pr_debug("setting to %u kHz\n", policy->max); + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); } static struct cpufreq_governor cpufreq_gov_performance = { .name = "performance", - .governor = cpufreq_governor_performance, .owner = THIS_MODULE, + .limits = cpufreq_gov_performance_limits, }; static int __init cpufreq_gov_performance_init(void) diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 691e573e46eeb4..78a651038faf2d 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -16,24 +16,15 @@ #include #include -static int cpufreq_governor_powersave(struct cpufreq_policy *policy, - unsigned int event) +static void cpufreq_gov_powersave_limits(struct cpufreq_policy *policy) { - switch (event) { - case CPUFREQ_GOV_LIMITS: - pr_debug("setting to %u kHz\n", policy->min); - __cpufreq_driver_target(policy, policy->min, - CPUFREQ_RELATION_L); - break; - default: - break; - } - return 0; + pr_debug("setting to %u kHz\n", policy->min); + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); } static struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", - .governor = cpufreq_governor_powersave, + .limits = cpufreq_gov_powersave_limits, .owner = THIS_MODULE, }; diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 9f3dec9a3f36db..bd897e3e134de0 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -65,66 +65,66 @@ static int cpufreq_userspace_policy_init(struct cpufreq_policy *policy) return 0; } -static int cpufreq_governor_userspace(struct cpufreq_policy *policy, - unsigned int event) +static void cpufreq_userspace_policy_exit(struct cpufreq_policy *policy) +{ + mutex_lock(&userspace_mutex); + kfree(policy->governor_data); + policy->governor_data = NULL; + mutex_unlock(&userspace_mutex); +} + +static int cpufreq_userspace_policy_start(struct cpufreq_policy *policy) { unsigned int *setspeed = policy->governor_data; - unsigned int cpu = policy->cpu; - int rc = 0; - if (event == CPUFREQ_GOV_POLICY_INIT) - return cpufreq_userspace_policy_init(policy); + BUG_ON(!policy->cur); + pr_debug("started managing cpu %u\n", policy->cpu); - if (!setspeed) - return -EINVAL; - - switch (event) { - case CPUFREQ_GOV_POLICY_EXIT: - mutex_lock(&userspace_mutex); - policy->governor_data = NULL; - kfree(setspeed); - mutex_unlock(&userspace_mutex); - break; - case CPUFREQ_GOV_START: - BUG_ON(!policy->cur); - pr_debug("started managing cpu %u\n", cpu); - - mutex_lock(&userspace_mutex); - per_cpu(cpu_is_managed, cpu) = 1; - *setspeed = policy->cur; - mutex_unlock(&userspace_mutex); - break; - case CPUFREQ_GOV_STOP: - pr_debug("managing cpu %u stopped\n", cpu); - - mutex_lock(&userspace_mutex); - per_cpu(cpu_is_managed, cpu) = 0; - *setspeed = 0; - mutex_unlock(&userspace_mutex); - break; - case CPUFREQ_GOV_LIMITS: - mutex_lock(&userspace_mutex); - pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n", - cpu, policy->min, policy->max, policy->cur, *setspeed); - - if (policy->max < *setspeed) - __cpufreq_driver_target(policy, policy->max, - CPUFREQ_RELATION_H); - else if (policy->min > *setspeed) - __cpufreq_driver_target(policy, policy->min, - CPUFREQ_RELATION_L); - else - __cpufreq_driver_target(policy, *setspeed, - CPUFREQ_RELATION_L); - mutex_unlock(&userspace_mutex); - break; - } - return rc; + mutex_lock(&userspace_mutex); + per_cpu(cpu_is_managed, policy->cpu) = 1; + *setspeed = policy->cur; + mutex_unlock(&userspace_mutex); + return 0; +} + +static void cpufreq_userspace_policy_stop(struct cpufreq_policy *policy) +{ + unsigned int *setspeed = policy->governor_data; + + pr_debug("managing cpu %u stopped\n", policy->cpu); + + mutex_lock(&userspace_mutex); + per_cpu(cpu_is_managed, policy->cpu) = 0; + *setspeed = 0; + mutex_unlock(&userspace_mutex); +} + +static void cpufreq_userspace_policy_limits(struct cpufreq_policy *policy) +{ + unsigned int *setspeed = policy->governor_data; + + mutex_lock(&userspace_mutex); + + pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n", + policy->cpu, policy->min, policy->max, policy->cur, *setspeed); + + if (policy->max < *setspeed) + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); + else if (policy->min > *setspeed) + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); + else + __cpufreq_driver_target(policy, *setspeed, CPUFREQ_RELATION_L); + + mutex_unlock(&userspace_mutex); } static struct cpufreq_governor cpufreq_gov_userspace = { .name = "userspace", - .governor = cpufreq_governor_userspace, + .init = cpufreq_userspace_policy_init, + .exit = cpufreq_userspace_policy_exit, + .start = cpufreq_userspace_policy_start, + .stop = cpufreq_userspace_policy_stop, + .limits = cpufreq_userspace_policy_limits, .store_setspeed = cpufreq_set, .show_setspeed = show_speed, .owner = THIS_MODULE, diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 4e81e08db7522f..5bcda3e6aa9efe 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -455,18 +455,14 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, #define MIN_LATENCY_MULTIPLIER (20) #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) -/* Governor Events */ -#define CPUFREQ_GOV_START 1 -#define CPUFREQ_GOV_STOP 2 -#define CPUFREQ_GOV_LIMITS 3 -#define CPUFREQ_GOV_POLICY_INIT 4 -#define CPUFREQ_GOV_POLICY_EXIT 5 - struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; int initialized; - int (*governor) (struct cpufreq_policy *policy, - unsigned int event); + int (*init)(struct cpufreq_policy *policy); + void (*exit)(struct cpufreq_policy *policy); + int (*start)(struct cpufreq_policy *policy); + void (*stop)(struct cpufreq_policy *policy); + void (*limits)(struct cpufreq_policy *policy); ssize_t (*show_setspeed) (struct cpufreq_policy *policy, char *buf); int (*store_setspeed) (struct cpufreq_policy *policy, diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 14c4aa25cc45d4..fdcee3cf38fcc6 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -394,7 +394,7 @@ static int sugov_init(struct cpufreq_policy *policy) return ret; } -static int sugov_exit(struct cpufreq_policy *policy) +static void sugov_exit(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; struct sugov_tunables *tunables = sg_policy->tunables; @@ -412,7 +412,6 @@ static int sugov_exit(struct cpufreq_policy *policy) mutex_unlock(&global_tunables_lock); sugov_policy_free(sg_policy); - return 0; } static int sugov_start(struct cpufreq_policy *policy) @@ -444,7 +443,7 @@ static int sugov_start(struct cpufreq_policy *policy) return 0; } -static int sugov_stop(struct cpufreq_policy *policy) +static void sugov_stop(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; unsigned int cpu; @@ -456,10 +455,9 @@ static int sugov_stop(struct cpufreq_policy *policy) irq_work_sync(&sg_policy->irq_work); cancel_work_sync(&sg_policy->work); - return 0; } -static int sugov_limits(struct cpufreq_policy *policy) +static void sugov_limits(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; @@ -477,32 +475,16 @@ static int sugov_limits(struct cpufreq_policy *policy) } sg_policy->need_freq_update = true; - return 0; -} - -int sugov_governor(struct cpufreq_policy *policy, unsigned int event) -{ - if (event == CPUFREQ_GOV_POLICY_INIT) { - return sugov_init(policy); - } else if (policy->governor_data) { - switch (event) { - case CPUFREQ_GOV_POLICY_EXIT: - return sugov_exit(policy); - case CPUFREQ_GOV_START: - return sugov_start(policy); - case CPUFREQ_GOV_STOP: - return sugov_stop(policy); - case CPUFREQ_GOV_LIMITS: - return sugov_limits(policy); - } - } - return -EINVAL; } static struct cpufreq_governor schedutil_gov = { .name = "schedutil", - .governor = sugov_governor, .owner = THIS_MODULE, + .init = sugov_init, + .exit = sugov_exit, + .start = sugov_start, + .stop = sugov_stop, + .limits = sugov_limits, }; static int __init sugov_module_init(void) From a69d6b2914865965acc6df95d1aac7ff8ca35094 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 18 May 2016 17:55:26 +0530 Subject: [PATCH 06/41] cpufreq: governor: Remove prints from allocation failures These aren't required anymore as the allocation core already prints such messages. Remove the redundant ones. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 4 +--- drivers/cpufreq/cpufreq_ondemand.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 2568508fca3878..78faa9fbc384f8 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -273,10 +273,8 @@ static int cs_init(struct dbs_data *dbs_data, bool notify) struct cs_dbs_tuners *tuners; tuners = kzalloc(sizeof(*tuners), GFP_KERNEL); - if (!tuners) { - pr_err("%s: kzalloc failed\n", __func__); + if (!tuners) return -ENOMEM; - } tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; tuners->freq_step = DEF_FREQUENCY_STEP; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 09d6c0c405e4f4..4441a11139f1a6 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -368,10 +368,8 @@ static int od_init(struct dbs_data *dbs_data, bool notify) int cpu; tuners = kzalloc(sizeof(*tuners), GFP_KERNEL); - if (!tuners) { - pr_err("%s: kzalloc failed\n", __func__); + if (!tuners) return -ENOMEM; - } cpu = get_cpu(); idle_time = get_cpu_idle_time_us(cpu, NULL); From 666f4ccc5d0f713500f5235eb3f12c1ea8d2278a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 18 May 2016 17:55:27 +0530 Subject: [PATCH 07/41] cpufreq: governor: Remove unnecessary bits from print message pr_*() helpers already prefix the print messages with "cpufreq_governor:" and similar details aren't required in the actual message. For example, the print message getting fixed looks like this before this patch: cpufreq_governor: cpufreq: Governor initialization failed (dbs_data kobject init error 0) Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index ca9927c15a71a4..18eab7f4ba4c69 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -458,7 +458,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) goto out; /* Failure, so roll back. */ - pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret); + pr_err("initialization failed (dbs_data kobject init error %d)\n", ret); policy->governor_data = NULL; From bf2be2de8493dd5f86d6e0f0d4eecb5810ad035b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 18 May 2016 17:55:31 +0530 Subject: [PATCH 08/41] cpufreq: governor: Create cpufreq_policy_apply_limits() Create a new helper to avoid code duplication across governors. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 7 +------ include/linux/cpufreq.h | 8 ++++++++ kernel/sched/cpufreq_schedutil.c | 9 +-------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 18eab7f4ba4c69..4b88b5bec4ef6f 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -553,12 +553,7 @@ void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; mutex_lock(&policy_dbs->timer_mutex); - - if (policy->max < policy->cur) - __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); - else if (policy->min > policy->cur) - __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); - + cpufreq_policy_apply_limits(policy); gov_update_sample_delay(policy_dbs, 0); mutex_unlock(&policy_dbs->timer_mutex); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 5bcda3e6aa9efe..3be54a0b43732c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -489,6 +489,14 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor); struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); +static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy) +{ + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); +} + /* Governor attribute set */ struct gov_attr_set { struct kobject kobj; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index fdcee3cf38fcc6..758efd7f3abe9d 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -463,14 +463,7 @@ static void sugov_limits(struct cpufreq_policy *policy) if (!policy->fast_switch_enabled) { mutex_lock(&sg_policy->work_lock); - - if (policy->max < policy->cur) - __cpufreq_driver_target(policy, policy->max, - CPUFREQ_RELATION_H); - else if (policy->min > policy->cur) - __cpufreq_driver_target(policy, policy->min, - CPUFREQ_RELATION_L); - + cpufreq_policy_apply_limits(policy); mutex_unlock(&sg_policy->work_lock); } From 9a15fb2c797a15524e63eacb10bd6cd68a99e830 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 18 May 2016 22:59:49 +0200 Subject: [PATCH 09/41] cpufreq: Drop the 'initialized' field from struct cpufreq_governor The 'initialized' field in struct cpufreq_governor is only used by the conservative governor (as a usage counter) and the way that happens is far from straightforward and arguably incorrect. Namely, the value of 'initialized' is checked by cpufreq_dbs_governor_init() and cpufreq_dbs_governor_exit() and the results of those checks are passed (as the second argument) to the ->init() and ->exit() callbacks in struct dbs_governor. Those callbacks are only implemented by the ondemand and conservative governors and ondemand doesn't use their second argument at all. In turn, the conservative governor uses it to decide whether or not to either register or unregister a transition notifier. That whole mechanism is not only unnecessarily convoluted, but also racy, because the 'initialized' field of struct cpufreq_governor is updated in cpufreq_init_governor() and cpufreq_exit_governor() under policy->rwsem which doesn't help if one of these functions is run twice in parallel for different policies (which isn't impossible in principle), for example. Instead of it, add a proper usage counter to the conservative governor and update it from cs_init() and cs_exit() which is guaranteed to be non-racy, as those functions are only called under gov_dbs_data_mutex which is global. With that in place, drop the 'initialized' field from struct cpufreq_governor as it is not used any more. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 3 -- drivers/cpufreq/cpufreq_conservative.c | 44 +++++++++++++++++--------- drivers/cpufreq/cpufreq_governor.c | 6 ++-- drivers/cpufreq/cpufreq_governor.h | 4 +-- drivers/cpufreq/cpufreq_ondemand.c | 4 +-- include/linux/cpufreq.h | 1 - 6 files changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d98ff688b2f110..bdf1f280ae3950 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2031,7 +2031,6 @@ static int cpufreq_init_governor(struct cpufreq_policy *policy) } } - policy->governor->initialized++; return 0; } @@ -2045,7 +2044,6 @@ static void cpufreq_exit_governor(struct cpufreq_policy *policy) if (policy->governor->exit) policy->governor->exit(policy); - policy->governor->initialized--; module_put(policy->governor->owner); } @@ -2110,7 +2108,6 @@ int cpufreq_register_governor(struct cpufreq_governor *governor) mutex_lock(&cpufreq_governor_mutex); - governor->initialized = 0; err = -EBUSY; if (!find_governor(governor->name)) { err = 0; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 78faa9fbc384f8..f967ec6c572001 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -127,7 +127,6 @@ static struct notifier_block cs_cpufreq_notifier_block = { }; /************************** sysfs interface ************************/ -static struct dbs_governor cs_dbs_gov; static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, const char *buf, size_t count) @@ -255,6 +254,13 @@ static struct attribute *cs_attributes[] = { /************************** sysfs end ************************/ +struct cs_governor { + struct dbs_governor dbs_gov; + unsigned int usage_count; +}; + +static struct cs_governor cs_gov; + static struct policy_dbs_info *cs_alloc(void) { struct cs_policy_dbs_info *dbs_info; @@ -268,7 +274,7 @@ static void cs_free(struct policy_dbs_info *policy_dbs) kfree(to_dbs_info(policy_dbs)); } -static int cs_init(struct dbs_data *dbs_data, bool notify) +static int cs_init(struct dbs_data *dbs_data) { struct cs_dbs_tuners *tuners; @@ -286,16 +292,22 @@ static int cs_init(struct dbs_data *dbs_data, bool notify) dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); - if (notify) + /* + * This function and cs_exit() are only called under gov_dbs_data_mutex + * which is global, so the cs_gov.usage_count accesses are guaranteed + * to be serialized. + */ + if (!cs_gov.usage_count++) cpufreq_register_notifier(&cs_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); return 0; } -static void cs_exit(struct dbs_data *dbs_data, bool notify) +static void cs_exit(struct dbs_data *dbs_data) { - if (notify) + /* Protected by gov_dbs_data_mutex - see the comment in cs_init(). */ + if (!--cs_gov.usage_count) cpufreq_unregister_notifier(&cs_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); @@ -310,18 +322,20 @@ static void cs_start(struct cpufreq_policy *policy) dbs_info->requested_freq = policy->cur; } -static struct dbs_governor cs_dbs_gov = { - .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), - .kobj_type = { .default_attrs = cs_attributes }, - .gov_dbs_timer = cs_dbs_timer, - .alloc = cs_alloc, - .free = cs_free, - .init = cs_init, - .exit = cs_exit, - .start = cs_start, +static struct cs_governor cs_gov = { + .dbs_gov = { + .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), + .kobj_type = { .default_attrs = cs_attributes }, + .gov_dbs_timer = cs_dbs_timer, + .alloc = cs_alloc, + .free = cs_free, + .init = cs_init, + .exit = cs_exit, + .start = cs_start, + }, }; -#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov) +#define CPU_FREQ_GOV_CONSERVATIVE (&cs_gov.dbs_gov.gov) static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 4b88b5bec4ef6f..31369e247192fc 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -429,7 +429,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list); - ret = gov->init(dbs_data, !policy->governor->initialized); + ret = gov->init(dbs_data); if (ret) goto free_policy_dbs_info; @@ -464,7 +464,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = NULL; - gov->exit(dbs_data, !policy->governor->initialized); + gov->exit(dbs_data); kfree(dbs_data); free_policy_dbs_info: @@ -494,7 +494,7 @@ void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = NULL; - gov->exit(dbs_data, policy->governor->initialized == 1); + gov->exit(dbs_data); kfree(dbs_data); } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 36f0d19dd8691e..ef1037e9c92b10 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -138,8 +138,8 @@ struct dbs_governor { unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); struct policy_dbs_info *(*alloc)(void); void (*free)(struct policy_dbs_info *policy_dbs); - int (*init)(struct dbs_data *dbs_data, bool notify); - void (*exit)(struct dbs_data *dbs_data, bool notify); + int (*init)(struct dbs_data *dbs_data); + void (*exit)(struct dbs_data *dbs_data); void (*start)(struct cpufreq_policy *policy); }; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 4441a11139f1a6..c84fc2240d4954 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -361,7 +361,7 @@ static void od_free(struct policy_dbs_info *policy_dbs) kfree(to_dbs_info(policy_dbs)); } -static int od_init(struct dbs_data *dbs_data, bool notify) +static int od_init(struct dbs_data *dbs_data) { struct od_dbs_tuners *tuners; u64 idle_time; @@ -400,7 +400,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify) return 0; } -static void od_exit(struct dbs_data *dbs_data, bool notify) +static void od_exit(struct dbs_data *dbs_data) { kfree(dbs_data->tuners); } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 3be54a0b43732c..fbd696edf5bd35 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -457,7 +457,6 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; - int initialized; int (*init)(struct cpufreq_policy *policy); void (*exit)(struct cpufreq_policy *policy); int (*start)(struct cpufreq_policy *policy); From 388612baba201bdcedfa66782f524dcc507d9f12 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 24 May 2016 10:26:50 +0530 Subject: [PATCH 10/41] cpufreq: Send START policy notification after sending CREATE The sequence got a bit wrong as we are sending CPUFREQ_START notifications even before we have sent CPUFREQ_CREATE_POLICY. Fix it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bdf1f280ae3950..364922fda42895 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1258,9 +1258,6 @@ static int cpufreq_online(unsigned int cpu) } } - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_START, policy); - if (new_policy) { ret = cpufreq_add_dev_interface(policy); if (ret) @@ -1273,6 +1270,9 @@ static int cpufreq_online(unsigned int cpu) write_unlock_irqrestore(&cpufreq_driver_lock, flags); } + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_START, policy); + ret = cpufreq_init_policy(policy); if (ret) { pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n", From 910c6e881c3a610dfd7575b96975d4df21e3a920 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 26 May 2016 11:27:24 +0530 Subject: [PATCH 11/41] cpufreq: Use clamp_val() in __cpufreq_driver_target() Use clamp_val() instead of open coding it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 364922fda42895..198416bc22de2d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1923,10 +1923,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, return -ENODEV; /* Make sure that target_freq is within supported range */ - if (target_freq > policy->max) - target_freq = policy->max; - if (target_freq < policy->min) - target_freq = policy->min; + target_freq = clamp_val(target_freq, policy->min, policy->max); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); From 1aefc75b2449eb68a6fc3ca932e2a4ee353b748d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 31 May 2016 22:14:44 +0200 Subject: [PATCH 12/41] cpufreq: stats: Make the stats code non-modular The modularity of cpufreq_stats is quite problematic. First off, the usage of policy notifiers for the initialization and cleanup in the cpufreq_stats module is inherently racy with respect to CPU offline/online and the initialization and cleanup of the cpufreq driver. Second, fast frequency switching (used by the schedutil governor) cannot be enabled if any transition notifiers are registered, so if the cpufreq_stats module (that registers a transition notifier for updating transition statistics) is loaded, the schedutil governor cannot use fast frequency switching. On the other hand, allowing cpufreq_stats to be built as a module doesn't really add much value. Arguably, there's not much reason for that code to be modular at all. For the above reasons, make the cpufreq stats code non-modular, modify the core to invoke functions provided by that code directly and drop the notifiers from it. Make the stats sysfs attributes appear empty if fast frequency switching is enabled as the statistics will not be updated in that case anyway (and returning -EBUSY from those attributes breaks powertop). While at it, clean up Kconfig help for the CPU_FREQ_STAT and CPU_FREQ_STAT_DETAILS options. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/Kconfig | 13 +-- drivers/cpufreq/cpufreq.c | 4 + drivers/cpufreq/cpufreq_stats.c | 154 +++++--------------------------- include/linux/cpufreq.h | 12 +++ 4 files changed, 41 insertions(+), 142 deletions(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index b7445b6ae5a4be..c822d72629d5ca 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -31,23 +31,18 @@ config CPU_FREQ_BOOST_SW depends on THERMAL config CPU_FREQ_STAT - tristate "CPU frequency translation statistics" + bool "CPU frequency transition statistics" default y help - This driver exports CPU frequency statistics information through sysfs - file system. - - To compile this driver as a module, choose M here: the - module will be called cpufreq_stats. + Export CPU frequency statistics information through sysfs. If in doubt, say N. config CPU_FREQ_STAT_DETAILS - bool "CPU frequency translation statistics details" + bool "CPU frequency transition statistics details" depends on CPU_FREQ_STAT help - This will show detail CPU frequency translation table in sysfs file - system. + Show detailed CPU frequency transition table in sysfs. If in doubt, say N. diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 198416bc22de2d..c6a14ba239a2ef 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -347,6 +347,7 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy, pr_debug("FREQ: %lu - CPU: %lu\n", (unsigned long)freqs->new, (unsigned long)freqs->cpu); trace_cpu_frequency(freqs->new, freqs->cpu); + cpufreq_stats_record_transition(policy, freqs->new); srcu_notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_POSTCHANGE, freqs); if (likely(policy) && likely(policy->cpu == freqs->cpu)) @@ -1108,6 +1109,7 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify) CPUFREQ_REMOVE_POLICY, policy); down_write(&policy->rwsem); + cpufreq_stats_free_table(policy); cpufreq_remove_dev_symlink(policy); kobj = &policy->kobj; cmp = &policy->kobj_unregister; @@ -1262,6 +1264,8 @@ static int cpufreq_online(unsigned int cpu) ret = cpufreq_add_dev_interface(policy); if (ret) goto out_exit_policy; + + cpufreq_stats_create_table(policy); blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 5e370a30a964f5..c6e7f81a039796 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -15,7 +15,7 @@ #include #include -static spinlock_t cpufreq_stats_lock; +static DEFINE_SPINLOCK(cpufreq_stats_lock); struct cpufreq_stats { unsigned int total_trans; @@ -52,6 +52,9 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) ssize_t len = 0; int i; + if (policy->fast_switch_enabled) + return 0; + cpufreq_stats_update(stats); for (i = 0; i < stats->state_num; i++) { len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i], @@ -68,6 +71,9 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) ssize_t len = 0; int i, j; + if (policy->fast_switch_enabled) + return 0; + len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); len += snprintf(buf + len, PAGE_SIZE - len, " : "); for (i = 0; i < stats->state_num; i++) { @@ -130,7 +136,7 @@ static int freq_table_get_index(struct cpufreq_stats *stats, unsigned int freq) return -1; } -static void __cpufreq_stats_free_table(struct cpufreq_policy *policy) +void cpufreq_stats_free_table(struct cpufreq_policy *policy) { struct cpufreq_stats *stats = policy->stats; @@ -146,20 +152,7 @@ static void __cpufreq_stats_free_table(struct cpufreq_policy *policy) policy->stats = NULL; } -static void cpufreq_stats_free_table(unsigned int cpu) -{ - struct cpufreq_policy *policy; - - policy = cpufreq_cpu_get(cpu); - if (!policy) - return; - - __cpufreq_stats_free_table(policy); - - cpufreq_cpu_put(policy); -} - -static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) +void cpufreq_stats_create_table(struct cpufreq_policy *policy) { unsigned int i = 0, count = 0, ret = -ENOMEM; struct cpufreq_stats *stats; @@ -170,15 +163,15 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) /* We need cpufreq table for creating stats table */ table = cpufreq_frequency_get_table(cpu); if (unlikely(!table)) - return 0; + return; /* stats already initialized */ if (policy->stats) - return -EEXIST; + return; stats = kzalloc(sizeof(*stats), GFP_KERNEL); if (!stats) - return -ENOMEM; + return; /* Find total allocation size */ cpufreq_for_each_valid_entry(pos, table) @@ -215,80 +208,32 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) policy->stats = stats; ret = sysfs_create_group(&policy->kobj, &stats_attr_group); if (!ret) - return 0; + return; /* We failed, release resources */ policy->stats = NULL; kfree(stats->time_in_state); free_stat: kfree(stats); - - return ret; -} - -static void cpufreq_stats_create_table(unsigned int cpu) -{ - struct cpufreq_policy *policy; - - /* - * "likely(!policy)" because normally cpufreq_stats will be registered - * before cpufreq driver - */ - policy = cpufreq_cpu_get(cpu); - if (likely(!policy)) - return; - - __cpufreq_stats_create_table(policy); - - cpufreq_cpu_put(policy); } -static int cpufreq_stat_notifier_policy(struct notifier_block *nb, - unsigned long val, void *data) +void cpufreq_stats_record_transition(struct cpufreq_policy *policy, + unsigned int new_freq) { - int ret = 0; - struct cpufreq_policy *policy = data; - - if (val == CPUFREQ_CREATE_POLICY) - ret = __cpufreq_stats_create_table(policy); - else if (val == CPUFREQ_REMOVE_POLICY) - __cpufreq_stats_free_table(policy); - - return ret; -} - -static int cpufreq_stat_notifier_trans(struct notifier_block *nb, - unsigned long val, void *data) -{ - struct cpufreq_freqs *freq = data; - struct cpufreq_policy *policy = cpufreq_cpu_get(freq->cpu); - struct cpufreq_stats *stats; + struct cpufreq_stats *stats = policy->stats; int old_index, new_index; - if (!policy) { - pr_err("%s: No policy found\n", __func__); - return 0; - } - - if (val != CPUFREQ_POSTCHANGE) - goto put_policy; - - if (!policy->stats) { + if (!stats) { pr_debug("%s: No stats found\n", __func__); - goto put_policy; + return; } - stats = policy->stats; - old_index = stats->last_index; - new_index = freq_table_get_index(stats, freq->new); + new_index = freq_table_get_index(stats, new_freq); /* We can't do stats->time_in_state[-1]= .. */ - if (old_index == -1 || new_index == -1) - goto put_policy; - - if (old_index == new_index) - goto put_policy; + if (old_index == -1 || new_index == -1 || old_index == new_index) + return; cpufreq_stats_update(stats); @@ -297,61 +242,4 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb, stats->trans_table[old_index * stats->max_state + new_index]++; #endif stats->total_trans++; - -put_policy: - cpufreq_cpu_put(policy); - return 0; } - -static struct notifier_block notifier_policy_block = { - .notifier_call = cpufreq_stat_notifier_policy -}; - -static struct notifier_block notifier_trans_block = { - .notifier_call = cpufreq_stat_notifier_trans -}; - -static int __init cpufreq_stats_init(void) -{ - int ret; - unsigned int cpu; - - spin_lock_init(&cpufreq_stats_lock); - ret = cpufreq_register_notifier(¬ifier_policy_block, - CPUFREQ_POLICY_NOTIFIER); - if (ret) - return ret; - - for_each_online_cpu(cpu) - cpufreq_stats_create_table(cpu); - - ret = cpufreq_register_notifier(¬ifier_trans_block, - CPUFREQ_TRANSITION_NOTIFIER); - if (ret) { - cpufreq_unregister_notifier(¬ifier_policy_block, - CPUFREQ_POLICY_NOTIFIER); - for_each_online_cpu(cpu) - cpufreq_stats_free_table(cpu); - return ret; - } - - return 0; -} -static void __exit cpufreq_stats_exit(void) -{ - unsigned int cpu; - - cpufreq_unregister_notifier(¬ifier_policy_block, - CPUFREQ_POLICY_NOTIFIER); - cpufreq_unregister_notifier(¬ifier_trans_block, - CPUFREQ_TRANSITION_NOTIFIER); - for_each_online_cpu(cpu) - cpufreq_stats_free_table(cpu); -} - -MODULE_AUTHOR("Zou Nan hai "); -MODULE_DESCRIPTION("Export cpufreq stats via sysfs"); -MODULE_LICENSE("GPL"); - -module_init(cpufreq_stats_init); -module_exit(cpufreq_stats_exit); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index fbd696edf5bd35..7ed93c310c0887 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -185,6 +185,18 @@ static inline unsigned int cpufreq_quick_get_max(unsigned int cpu) static inline void disable_cpufreq(void) { } #endif +#ifdef CONFIG_CPU_FREQ_STAT +void cpufreq_stats_create_table(struct cpufreq_policy *policy); +void cpufreq_stats_free_table(struct cpufreq_policy *policy); +void cpufreq_stats_record_transition(struct cpufreq_policy *policy, + unsigned int new_freq); +#else +static inline void cpufreq_stats_create_table(struct cpufreq_policy *policy) { } +static inline void cpufreq_stats_free_table(struct cpufreq_policy *policy) { } +static inline void cpufreq_stats_record_transition(struct cpufreq_policy *policy, + unsigned int new_freq) { } +#endif /* CONFIG_CPU_FREQ_STAT */ + /********************************************************************* * CPUFREQ DRIVER INTERFACE * *********************************************************************/ From 64bf55a72f550ad33f4053493717f51efe632e9f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 31 May 2016 16:50:23 +0530 Subject: [PATCH 13/41] cpufreq: Unexport cpufreq_frequency_table_cpuinfo() All cpufreq drivers with a freq-table are migrated to use cpufreq_table_validate_and_show() long back and the routine cpufreq_frequency_table_cpuinfo() isn't used outside of cpufreq core now. Unexport it and update Documentation as well. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/core.txt | 4 ++-- Documentation/cpu-freq/cpu-drivers.txt | 2 +- drivers/cpufreq/freq_table.c | 2 -- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt index ba78e7c2a0697e..4bc7287806de46 100644 --- a/Documentation/cpu-freq/core.txt +++ b/Documentation/cpu-freq/core.txt @@ -96,7 +96,7 @@ new - new frequency For details about OPP, see Documentation/power/opp.txt dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with - cpufreq_frequency_table_cpuinfo which is provided with the list of + cpufreq_table_validate_and_show() which is provided with the list of frequencies that are available for operation. This function provides a ready to use conversion routine to translate the OPP layer's internal information about the available frequencies into a format readily @@ -110,7 +110,7 @@ dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with /* Do things */ r = dev_pm_opp_init_cpufreq_table(dev, &freq_table); if (!r) - cpufreq_frequency_table_cpuinfo(policy, freq_table); + cpufreq_table_validate_and_show(policy, freq_table); /* Do other things */ } diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index 14f4e6336d88b7..decbb8cb55733a 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt @@ -231,7 +231,7 @@ if you want to skip one entry in the table, set the frequency to CPUFREQ_ENTRY_INVALID. The entries don't need to be in ascending order. -By calling cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, +By calling cpufreq_table_validate_and_show(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); the cpuinfo.min_freq and cpuinfo.max_freq values are detected, and policy->min and policy->max are set to the same values. This is diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index a8f1daffc9bcbf..4e5c5dbfed7a5a 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -63,8 +63,6 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, else return 0; } -EXPORT_SYMBOL_GPL(cpufreq_frequency_table_cpuinfo); - int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table) From f0f879ba533ed29ca277c95d529fce703113271e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 Jun 2016 10:58:46 +0530 Subject: [PATCH 14/41] cpufreq: s3c24xx: Remove useless checks These aren't required at all, remove them. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/s3c24xx-cpufreq.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c index ae8eaed77b7074..4567c3cab0956b 100644 --- a/drivers/cpufreq/s3c24xx-cpufreq.c +++ b/drivers/cpufreq/s3c24xx-cpufreq.c @@ -571,11 +571,7 @@ static int s3c_cpufreq_build_freq(void) { int size, ret; - if (!cpu_cur.info->calc_freqtable) - return -EINVAL; - kfree(ftab); - ftab = NULL; size = cpu_cur.info->calc_freqtable(&cpu_cur, NULL, 0); size++; From f8bfc116cacbdf7e0e655d8a798a242087ed70a5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 Jun 2016 10:58:47 +0530 Subject: [PATCH 15/41] cpufreq: Remove cpufreq_frequency_get_table() Most of the callers of cpufreq_frequency_get_table() already have the pointer to a valid 'policy' structure and they don't really need to go through the per-cpu variable first and then a check to validate the frequency, in order to find the freq-table for the policy. Directly use the policy->freq_table field instead for them. Only one user of that API is left after above changes, cpu_cooling.c and it accesses the freq_table in a racy way as the policy can get freed in between. Fix it by using cpufreq_cpu_get() properly. Since there are no more users of cpufreq_frequency_get_table() left, get rid of it. Signed-off-by: Viresh Kumar Acked-by: Javi Merino (cpu_cooling.c) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 38 ++++++++++----------------- drivers/cpufreq/cpufreq_ondemand.c | 2 +- drivers/cpufreq/cpufreq_stats.c | 3 +-- drivers/cpufreq/freq_table.c | 9 +++---- drivers/cpufreq/ppc_cbe_cpufreq_pmi.c | 3 +-- drivers/thermal/cpu_cooling.c | 24 +++++++++++++---- include/linux/cpufreq.h | 2 -- 7 files changed, 39 insertions(+), 42 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c6a14ba239a2ef..cc252eecc45a4b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -126,15 +126,6 @@ struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(get_governor_parent_kobj); -struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu) -{ - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - - return policy && !policy_is_inactive(policy) ? - policy->freq_table : NULL; -} -EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table); - static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) { u64 idle_time; @@ -1950,7 +1941,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (!cpufreq_driver->target_index) return -EINVAL; - freq_table = cpufreq_frequency_get_table(policy->cpu); + freq_table = policy->freq_table; if (unlikely(!freq_table)) { pr_err("%s: Unable to find freq_table\n", __func__); return -EINVAL; @@ -2345,26 +2336,25 @@ static struct notifier_block __refdata cpufreq_cpu_notifier = { *********************************************************************/ static int cpufreq_boost_set_sw(int state) { - struct cpufreq_frequency_table *freq_table; struct cpufreq_policy *policy; int ret = -EINVAL; for_each_active_policy(policy) { - freq_table = cpufreq_frequency_get_table(policy->cpu); - if (freq_table) { - ret = cpufreq_frequency_table_cpuinfo(policy, - freq_table); - if (ret) { - pr_err("%s: Policy frequency update failed\n", - __func__); - break; - } + if (!policy->freq_table) + continue; - down_write(&policy->rwsem); - policy->user_policy.max = policy->max; - cpufreq_governor_limits(policy); - up_write(&policy->rwsem); + ret = cpufreq_frequency_table_cpuinfo(policy, + policy->freq_table); + if (ret) { + pr_err("%s: Policy frequency update failed\n", + __func__); + break; } + + down_write(&policy->rwsem); + policy->user_policy.max = policy->max; + cpufreq_governor_limits(policy); + up_write(&policy->rwsem); } return ret; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index c84fc2240d4954..4d2fe2710c5d79 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -113,7 +113,7 @@ static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) { struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); - dbs_info->freq_table = cpufreq_frequency_get_table(policy->cpu); + dbs_info->freq_table = policy->freq_table; dbs_info->freq_lo = 0; } diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index c6e7f81a039796..06d3abdffd3a39 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -157,11 +157,10 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy) unsigned int i = 0, count = 0, ret = -ENOMEM; struct cpufreq_stats *stats; unsigned int alloc_size; - unsigned int cpu = policy->cpu; struct cpufreq_frequency_table *pos, *table; /* We need cpufreq table for creating stats table */ - table = cpufreq_frequency_get_table(cpu); + table = policy->freq_table; if (unlikely(!table)) return; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 4e5c5dbfed7a5a..f52b5473b1f46e 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -106,12 +106,10 @@ EXPORT_SYMBOL_GPL(cpufreq_frequency_table_verify); */ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy) { - struct cpufreq_frequency_table *table = - cpufreq_frequency_get_table(policy->cpu); - if (!table) + if (!policy->freq_table) return -ENODEV; - return cpufreq_frequency_table_verify(policy, table); + return cpufreq_frequency_table_verify(policy, policy->freq_table); } EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); @@ -210,9 +208,8 @@ EXPORT_SYMBOL_GPL(cpufreq_frequency_table_target); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq) { - struct cpufreq_frequency_table *pos, *table; + struct cpufreq_frequency_table *pos, *table = policy->freq_table; - table = cpufreq_frequency_get_table(policy->cpu); if (unlikely(!table)) { pr_debug("%s: Unable to find frequency table\n", __func__); return -ENOENT; diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index 7c4cd5c634f230..dc112481a40841 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -94,7 +94,7 @@ static int pmi_notifier(struct notifier_block *nb, unsigned long event, void *data) { struct cpufreq_policy *policy = data; - struct cpufreq_frequency_table *cbe_freqs; + struct cpufreq_frequency_table *cbe_freqs = policy->freq_table; u8 node; /* Should this really be called for CPUFREQ_ADJUST and CPUFREQ_NOTIFY @@ -103,7 +103,6 @@ static int pmi_notifier(struct notifier_block *nb, if (event == CPUFREQ_START) return 0; - cbe_freqs = cpufreq_frequency_get_table(policy->cpu); node = cbe_cpu_to_node(policy->cpu); pr_debug("got notified, event=%lu, node=%u\n", event, node); diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 6ceac4f2d4b227..4d678cfc81b19a 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -787,22 +787,34 @@ __cpufreq_cooling_register(struct device_node *np, const struct cpumask *clip_cpus, u32 capacitance, get_static_t plat_static_func) { + struct cpufreq_policy *policy; struct thermal_cooling_device *cool_dev; struct cpufreq_cooling_device *cpufreq_dev; char dev_name[THERMAL_NAME_LENGTH]; struct cpufreq_frequency_table *pos, *table; + struct cpumask temp_mask; unsigned int freq, i, num_cpus; int ret; - table = cpufreq_frequency_get_table(cpumask_first(clip_cpus)); + cpumask_and(&temp_mask, clip_cpus, cpu_online_mask); + policy = cpufreq_cpu_get(cpumask_first(&temp_mask)); + if (!policy) { + pr_debug("%s: CPUFreq policy not found\n", __func__); + return ERR_PTR(-EPROBE_DEFER); + } + + table = policy->freq_table; if (!table) { pr_debug("%s: CPUFreq table not found\n", __func__); - return ERR_PTR(-EPROBE_DEFER); + cool_dev = ERR_PTR(-ENODEV); + goto put_policy; } cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL); - if (!cpufreq_dev) - return ERR_PTR(-ENOMEM); + if (!cpufreq_dev) { + cool_dev = ERR_PTR(-ENOMEM); + goto put_policy; + } num_cpus = cpumask_weight(clip_cpus); cpufreq_dev->time_in_idle = kcalloc(num_cpus, @@ -892,7 +904,7 @@ __cpufreq_cooling_register(struct device_node *np, CPUFREQ_POLICY_NOTIFIER); mutex_unlock(&cooling_cpufreq_lock); - return cool_dev; + goto put_policy; remove_idr: release_idr(&cpufreq_idr, cpufreq_dev->id); @@ -906,6 +918,8 @@ __cpufreq_cooling_register(struct device_node *np, kfree(cpufreq_dev->time_in_idle); free_cdev: kfree(cpufreq_dev); +put_policy: + cpufreq_cpu_put(policy); return cool_dev; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7ed93c310c0887..1342cbc0f25e71 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -632,8 +632,6 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) return false; } #endif -/* the following funtion is for cpufreq core use only */ -struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu); /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; From 34ac5d7a1d1da203008697e1a69cb4bdbff8684c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 Jun 2016 10:58:48 +0530 Subject: [PATCH 16/41] cpufreq: ondemand: Don't keep a copy of freq_table pointer There is absolutely no need to keep a copy to the freq-table in 'struct od_policy_dbs_info'. Use policy->freq_table instead. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd_freq_sensitivity.c | 7 +++---- drivers/cpufreq/cpufreq_ondemand.c | 22 +++++++++++----------- drivers/cpufreq/cpufreq_ondemand.h | 1 - 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 404360cad25c4c..bc86816693a855 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -48,9 +48,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *od_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = od_data->tuners; - struct od_policy_dbs_info *od_info = to_dbs_info(policy_dbs); - if (!od_info->freq_table) + if (!policy->freq_table) return freq_next; rdmsr_on_cpu(policy->cpu, MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, @@ -93,9 +92,9 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, unsigned int index; cpufreq_frequency_table_target(policy, - od_info->freq_table, policy->cur - 1, + policy->freq_table, policy->cur - 1, CPUFREQ_RELATION_H, &index); - freq_next = od_info->freq_table[index].frequency; + freq_next = policy->freq_table[index].frequency; } data->freq_prev = freq_next; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 4d2fe2710c5d79..528353f204fd7e 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -71,28 +71,29 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; + struct cpufreq_frequency_table *freq_table = policy->freq_table; - if (!dbs_info->freq_table) { + if (!freq_table) { dbs_info->freq_lo = 0; dbs_info->freq_lo_delay_us = 0; return freq_next; } - cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, - relation, &index); - freq_req = dbs_info->freq_table[index].frequency; + cpufreq_frequency_table_target(policy, freq_table, freq_next, relation, + &index); + freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; /* Find freq bounds for freq_avg in freq_table */ index = 0; - cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, - CPUFREQ_RELATION_H, &index); - freq_lo = dbs_info->freq_table[index].frequency; + cpufreq_frequency_table_target(policy, freq_table, freq_avg, + CPUFREQ_RELATION_H, &index); + freq_lo = freq_table[index].frequency; index = 0; - cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, - CPUFREQ_RELATION_L, &index); - freq_hi = dbs_info->freq_table[index].frequency; + cpufreq_frequency_table_target(policy, freq_table, freq_avg, + CPUFREQ_RELATION_L, &index); + freq_hi = freq_table[index].frequency; /* Find out how long we have to be in hi and lo freqs */ if (freq_hi == freq_lo) { @@ -113,7 +114,6 @@ static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) { struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); - dbs_info->freq_table = policy->freq_table; dbs_info->freq_lo = 0; } diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h index f0121db3cd9eda..640ea4e9710623 100644 --- a/drivers/cpufreq/cpufreq_ondemand.h +++ b/drivers/cpufreq/cpufreq_ondemand.h @@ -13,7 +13,6 @@ struct od_policy_dbs_info { struct policy_dbs_info policy_dbs; - struct cpufreq_frequency_table *freq_table; unsigned int freq_lo; unsigned int freq_lo_delay_us; unsigned int freq_hi_delay_us; From 7ab4aabbaa98fbf9f8967fd93162d675439337e1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 Jun 2016 10:58:49 +0530 Subject: [PATCH 17/41] cpufreq: Drop freq-table param to cpufreq_frequency_table_target() The policy already has this pointer set, use it instead. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpu-drivers.txt | 1 - drivers/cpufreq/amd_freq_sensitivity.c | 3 +-- drivers/cpufreq/cpufreq.c | 4 ++-- drivers/cpufreq/cpufreq_ondemand.c | 11 +++++------ drivers/cpufreq/freq_table.c | 2 +- drivers/cpufreq/powernv-cpufreq.c | 3 +-- drivers/cpufreq/s3c24xx-cpufreq.c | 11 +++++------ drivers/cpufreq/s5pv210-cpufreq.c | 3 +-- include/linux/cpufreq.h | 1 - 9 files changed, 16 insertions(+), 23 deletions(-) diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index decbb8cb55733a..74e812f0719c4b 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt @@ -244,7 +244,6 @@ policy->max, and all other criteria are met. This is helpful for the ->verify call. int cpufreq_frequency_table_target(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table, unsigned int target_freq, unsigned int relation, unsigned int *index); diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index bc86816693a855..3bea1bb791a941 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -92,8 +92,7 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, unsigned int index; cpufreq_frequency_table_target(policy, - policy->freq_table, policy->cur - 1, - CPUFREQ_RELATION_H, &index); + policy->cur - 1, CPUFREQ_RELATION_H, &index); freq_next = policy->freq_table[index].frequency; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cc252eecc45a4b..a281446971289c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1947,8 +1947,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, return -EINVAL; } - retval = cpufreq_frequency_table_target(policy, freq_table, target_freq, - relation, &index); + retval = cpufreq_frequency_table_target(policy, target_freq, relation, + &index); if (unlikely(retval)) { pr_err("%s: Unable to find matching freq\n", __func__); return retval; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 528353f204fd7e..2ee476f5a2bdb2 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -79,20 +79,19 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_next; } - cpufreq_frequency_table_target(policy, freq_table, freq_next, relation, - &index); + cpufreq_frequency_table_target(policy, freq_next, relation, &index); freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; /* Find freq bounds for freq_avg in freq_table */ index = 0; - cpufreq_frequency_table_target(policy, freq_table, freq_avg, - CPUFREQ_RELATION_H, &index); + cpufreq_frequency_table_target(policy, freq_avg, CPUFREQ_RELATION_H, + &index); freq_lo = freq_table[index].frequency; index = 0; - cpufreq_frequency_table_target(policy, freq_table, freq_avg, - CPUFREQ_RELATION_L, &index); + cpufreq_frequency_table_target(policy, freq_avg, CPUFREQ_RELATION_L, + &index); freq_hi = freq_table[index].frequency; /* Find out how long we have to be in hi and lo freqs */ diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index f52b5473b1f46e..f145b64649ef76 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -114,7 +114,6 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy) EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); int cpufreq_frequency_table_target(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table, unsigned int target_freq, unsigned int relation, unsigned int *index) @@ -128,6 +127,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, .frequency = 0, }; struct cpufreq_frequency_table *pos; + struct cpufreq_frequency_table *table = policy->freq_table; unsigned int freq, diff, i = 0; pr_debug("request for target %u kHz (relation: %u) for cpu %u\n", diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 54c45368e3f17d..bf267c2dfe2095 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -760,8 +760,7 @@ void powernv_cpufreq_work_fn(struct work_struct *work) struct cpufreq_policy policy; cpufreq_get_policy(&policy, cpu); - cpufreq_frequency_table_target(&policy, policy.freq_table, - policy.cur, + cpufreq_frequency_table_target(&policy, policy.cur, CPUFREQ_RELATION_C, &index); powernv_cpufreq_target_index(&policy, index); cpumask_andnot(&mask, &mask, policy.cpus); diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c index 4567c3cab0956b..05a9737278f376 100644 --- a/drivers/cpufreq/s3c24xx-cpufreq.c +++ b/drivers/cpufreq/s3c24xx-cpufreq.c @@ -293,9 +293,8 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, __func__, policy, target_freq, relation); if (ftab) { - if (cpufreq_frequency_table_target(policy, ftab, - target_freq, relation, - &index)) { + if (cpufreq_frequency_table_target(policy, target_freq, + relation, &index)) { s3c_freq_dbg("%s: table failed\n", __func__); return -EINVAL; } @@ -323,14 +322,14 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, tmp_policy.min = policy->min * 1000; tmp_policy.max = policy->max * 1000; tmp_policy.cpu = policy->cpu; + tmp_policy.freq_table = pll_reg; /* cpufreq_frequency_table_target uses a pointer to 'index' * which is the number of the table entry, not the value of * the table entry's index field. */ - ret = cpufreq_frequency_table_target(&tmp_policy, pll_reg, - target_freq, relation, - &index); + ret = cpufreq_frequency_table_target(&tmp_policy, target_freq, + relation, &index); if (ret < 0) { pr_err("%s: no PLL available\n", __func__); diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 06d85917b6d5d3..6b0cfc3b8c466c 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -246,8 +246,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index) new_freq = s5pv210_freq_table[index].frequency; /* Finding current running level index */ - if (cpufreq_frequency_table_target(policy, s5pv210_freq_table, - old_freq, CPUFREQ_RELATION_H, + if (cpufreq_frequency_table_target(policy, old_freq, CPUFREQ_RELATION_H, &priv_index)) { ret = -EINVAL; goto exit; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1342cbc0f25e71..bdd7f0c035ae32 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -598,7 +598,6 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy); int cpufreq_frequency_table_target(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table, unsigned int target_freq, unsigned int relation, unsigned int *index); From 2372784542cb7a8b256a740840abf4503de0d669 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 Jun 2016 10:58:50 +0530 Subject: [PATCH 18/41] cpufreq: Drop 'freq_table' argument of __target_index() It is already present as part of the policy and so no need to pass it from the caller. Also, 'freq_table' is guaranteed to be valid in this function and so no need to check it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a281446971289c..1685e930770f4f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1852,14 +1852,17 @@ static int __target_intermediate(struct cpufreq_policy *policy, return ret; } -static int __target_index(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *freq_table, int index) +static int __target_index(struct cpufreq_policy *policy, int index) { struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0}; unsigned int intermediate_freq = 0; + unsigned int newfreq = policy->freq_table[index].frequency; int retval = -EINVAL; bool notify; + if (newfreq == policy->cur) + return 0; + notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION); if (notify) { /* Handle switching to intermediate frequency */ @@ -1874,7 +1877,7 @@ static int __target_index(struct cpufreq_policy *policy, freqs.old = freqs.new; } - freqs.new = freq_table[index].frequency; + freqs.new = newfreq; pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n", __func__, policy->cpu, freqs.old, freqs.new); @@ -1911,7 +1914,6 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation) { unsigned int old_target_freq = target_freq; - struct cpufreq_frequency_table *freq_table; int index, retval; if (cpufreq_disabled()) @@ -1941,12 +1943,6 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (!cpufreq_driver->target_index) return -EINVAL; - freq_table = policy->freq_table; - if (unlikely(!freq_table)) { - pr_err("%s: Unable to find freq_table\n", __func__); - return -EINVAL; - } - retval = cpufreq_frequency_table_target(policy, target_freq, relation, &index); if (unlikely(retval)) { @@ -1954,10 +1950,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, return retval; } - if (freq_table[index].frequency == policy->cur) - return 0; - - return __target_index(policy, freq_table, index); + return __target_index(policy, index); } EXPORT_SYMBOL_GPL(__cpufreq_driver_target); From d218ed773915a2b762d132be6bb765637338c360 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 Jun 2016 10:58:51 +0530 Subject: [PATCH 19/41] cpufreq: Return index from cpufreq_frequency_table_target() This routine can't fail unless the frequency table is invalid and doesn't contain any valid entries. Make it return the index and WARN() in case it is used for an invalid table. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpu-drivers.txt | 7 +++---- drivers/cpufreq/amd_freq_sensitivity.c | 4 ++-- drivers/cpufreq/cpufreq.c | 9 ++------- drivers/cpufreq/cpufreq_ondemand.c | 14 ++++++-------- drivers/cpufreq/freq_table.c | 24 +++++++++++++----------- drivers/cpufreq/powernv-cpufreq.c | 4 ++-- drivers/cpufreq/s3c24xx-cpufreq.c | 26 ++++++-------------------- drivers/cpufreq/s5pv210-cpufreq.c | 7 ++----- include/linux/cpufreq.h | 3 +-- 9 files changed, 37 insertions(+), 61 deletions(-) diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index 74e812f0719c4b..772b94fde2640a 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt @@ -245,12 +245,11 @@ policy->max, and all other criteria are met. This is helpful for the int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, - unsigned int relation, - unsigned int *index); + unsigned int relation); is the corresponding frequency table helper for the ->target -stage. Just pass the values to this function, and the unsigned int -index returns the number of the frequency table entry which contains +stage. Just pass the values to this function, and this function +returns the number of the frequency table entry which contains the frequency the CPU shall be set to. The following macros can be used as iterators over cpufreq_frequency_table: diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 3bea1bb791a941..6d5dc04c3a3762 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -91,8 +91,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, else { unsigned int index; - cpufreq_frequency_table_target(policy, - policy->cur - 1, CPUFREQ_RELATION_H, &index); + index = cpufreq_frequency_table_target(policy, + policy->cur - 1, CPUFREQ_RELATION_H); freq_next = policy->freq_table[index].frequency; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1685e930770f4f..07c933c6c29a16 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1914,7 +1914,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation) { unsigned int old_target_freq = target_freq; - int index, retval; + int index; if (cpufreq_disabled()) return -ENODEV; @@ -1943,12 +1943,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (!cpufreq_driver->target_index) return -EINVAL; - retval = cpufreq_frequency_table_target(policy, target_freq, relation, - &index); - if (unlikely(retval)) { - pr_err("%s: Unable to find matching freq\n", __func__); - return retval; - } + index = cpufreq_frequency_table_target(policy, target_freq, relation); return __target_index(policy, index); } diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 2ee476f5a2bdb2..0c93cd9dee996f 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -65,7 +65,7 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, { unsigned int freq_req, freq_reduc, freq_avg; unsigned int freq_hi, freq_lo; - unsigned int index = 0; + unsigned int index; unsigned int delay_hi_us; struct policy_dbs_info *policy_dbs = policy->governor_data; struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); @@ -79,19 +79,17 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_next; } - cpufreq_frequency_table_target(policy, freq_next, relation, &index); + index = cpufreq_frequency_table_target(policy, freq_next, relation); freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; /* Find freq bounds for freq_avg in freq_table */ - index = 0; - cpufreq_frequency_table_target(policy, freq_avg, CPUFREQ_RELATION_H, - &index); + index = cpufreq_frequency_table_target(policy, freq_avg, + CPUFREQ_RELATION_H); freq_lo = freq_table[index].frequency; - index = 0; - cpufreq_frequency_table_target(policy, freq_avg, CPUFREQ_RELATION_L, - &index); + index = cpufreq_frequency_table_target(policy, freq_avg, + CPUFREQ_RELATION_L); freq_hi = freq_table[index].frequency; /* Find out how long we have to be in hi and lo freqs */ diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index f145b64649ef76..eac8bcbdaad1b1 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -114,9 +114,8 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy) EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); int cpufreq_frequency_table_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation, - unsigned int *index) + unsigned int target_freq, + unsigned int relation) { struct cpufreq_frequency_table optimal = { .driver_data = ~0, @@ -129,6 +128,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, struct cpufreq_frequency_table *pos; struct cpufreq_frequency_table *table = policy->freq_table; unsigned int freq, diff, i = 0; + int index; pr_debug("request for target %u kHz (relation: %u) for cpu %u\n", target_freq, relation, policy->cpu); @@ -192,16 +192,18 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, } } if (optimal.driver_data > i) { - if (suboptimal.driver_data > i) - return -EINVAL; - *index = suboptimal.driver_data; - } else - *index = optimal.driver_data; + if (suboptimal.driver_data > i) { + WARN(1, "Invalid frequency table: %d\n", policy->cpu); + return 0; + } - pr_debug("target index is %u, freq is:%u kHz\n", *index, - table[*index].frequency); + index = suboptimal.driver_data; + } else + index = optimal.driver_data; - return 0; + pr_debug("target index is %u, freq is:%u kHz\n", index, + table[index].frequency); + return index; } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_target); diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index bf267c2dfe2095..b29c5c20c3a1c5 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -760,8 +760,8 @@ void powernv_cpufreq_work_fn(struct work_struct *work) struct cpufreq_policy policy; cpufreq_get_policy(&policy, cpu); - cpufreq_frequency_table_target(&policy, policy.cur, - CPUFREQ_RELATION_C, &index); + index = cpufreq_frequency_table_target(&policy, policy.cur, + CPUFREQ_RELATION_C); powernv_cpufreq_target_index(&policy, index); cpumask_andnot(&mask, &mask, policy.cpus); } diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c index 05a9737278f376..7b596fa38ad2de 100644 --- a/drivers/cpufreq/s3c24xx-cpufreq.c +++ b/drivers/cpufreq/s3c24xx-cpufreq.c @@ -293,11 +293,8 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, __func__, policy, target_freq, relation); if (ftab) { - if (cpufreq_frequency_table_target(policy, target_freq, - relation, &index)) { - s3c_freq_dbg("%s: table failed\n", __func__); - return -EINVAL; - } + index = cpufreq_frequency_table_target(policy, target_freq, + relation); s3c_freq_dbg("%s: adjust %d to entry %d (%u)\n", __func__, target_freq, index, ftab[index].frequency); @@ -314,7 +311,6 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, pll = NULL; } else { struct cpufreq_policy tmp_policy; - int ret; /* we keep the cpu pll table in Hz, to ensure we get an * accurate value for the PLL output. */ @@ -324,18 +320,12 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, tmp_policy.cpu = policy->cpu; tmp_policy.freq_table = pll_reg; - /* cpufreq_frequency_table_target uses a pointer to 'index' - * which is the number of the table entry, not the value of + /* cpufreq_frequency_table_target returns the index + * of the table entry, not the value of * the table entry's index field. */ - ret = cpufreq_frequency_table_target(&tmp_policy, target_freq, - relation, &index); - - if (ret < 0) { - pr_err("%s: no PLL available\n", __func__); - goto err_notpossible; - } - + index = cpufreq_frequency_table_target(&tmp_policy, target_freq, + relation); pll = pll_reg + index; s3c_freq_dbg("%s: target %u => %u\n", @@ -345,10 +335,6 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, } return s3c_cpufreq_settarget(policy, target_freq, pll); - - err_notpossible: - pr_err("no compatible settings for %d\n", target_freq); - return -EINVAL; } struct clk *s3c_cpufreq_clk_get(struct device *dev, const char *name) diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 6b0cfc3b8c466c..4f4e9df9b7fc99 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -246,11 +246,8 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index) new_freq = s5pv210_freq_table[index].frequency; /* Finding current running level index */ - if (cpufreq_frequency_table_target(policy, old_freq, CPUFREQ_RELATION_H, - &priv_index)) { - ret = -EINVAL; - goto exit; - } + priv_index = cpufreq_frequency_table_target(policy, old_freq, + CPUFREQ_RELATION_H); arm_volt = dvs_conf[index].arm_volt; int_volt = dvs_conf[index].int_volt; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index bdd7f0c035ae32..c378776628b4b5 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -599,8 +599,7 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy); int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, - unsigned int relation, - unsigned int *index); + unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); From 9d4de2904ef0add3006df8896bfd07f534c44dd8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 Jun 2016 10:58:52 +0530 Subject: [PATCH 20/41] cpufreq: davinci: Reuse cpufreq_generic_frequency_table_verify() policy->freq_table will always be valid for this platform, otherwise driver's probe() would fail. And so this routine will *always* return after calling cpufreq_frequency_table_verify(). This can be done using the generic callback provided by core, lets use it instead. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/davinci-cpufreq.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c index 7e336d20c184b4..b95a872800ecf0 100644 --- a/drivers/cpufreq/davinci-cpufreq.c +++ b/drivers/cpufreq/davinci-cpufreq.c @@ -38,26 +38,6 @@ struct davinci_cpufreq { }; static struct davinci_cpufreq cpufreq; -static int davinci_verify_speed(struct cpufreq_policy *policy) -{ - struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data; - struct cpufreq_frequency_table *freq_table = pdata->freq_table; - struct clk *armclk = cpufreq.armclk; - - if (freq_table) - return cpufreq_frequency_table_verify(policy, freq_table); - - if (policy->cpu) - return -EINVAL; - - cpufreq_verify_within_cpu_limits(policy); - policy->min = clk_round_rate(armclk, policy->min * 1000) / 1000; - policy->max = clk_round_rate(armclk, policy->max * 1000) / 1000; - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); - return 0; -} - static int davinci_target(struct cpufreq_policy *policy, unsigned int idx) { struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data; @@ -121,7 +101,7 @@ static int davinci_cpu_init(struct cpufreq_policy *policy) static struct cpufreq_driver davinci_driver = { .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, - .verify = davinci_verify_speed, + .verify = cpufreq_generic_frequency_table_verify, .target_index = davinci_target, .get = cpufreq_generic_get, .init = davinci_cpu_init, From f6709b8aa78fb6765c443ad6b70fdaf48b89d95d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 9 Jun 2016 01:45:32 +0200 Subject: [PATCH 21/41] cpufreq: governor: Drop gov_cancel_work() There's no reason for gov_cancel_work() to exist at all, as it only has one caller and the only thing done by that caller is to invoke gov_cancel_work(). Accordingly, drop gov_cancel_work() and move its contents to the caller. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 31369e247192fc..e415349ab31b63 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -336,17 +336,6 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) synchronize_sched(); } -static void gov_cancel_work(struct cpufreq_policy *policy) -{ - struct policy_dbs_info *policy_dbs = policy->governor_data; - - gov_clear_update_util(policy_dbs->policy); - irq_work_sync(&policy_dbs->irq_work); - cancel_work_sync(&policy_dbs->work); - atomic_set(&policy_dbs->work_count, 0); - policy_dbs->work_in_progress = false; -} - static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, struct dbs_governor *gov) { @@ -544,7 +533,13 @@ EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_start); void cpufreq_dbs_governor_stop(struct cpufreq_policy *policy) { - gov_cancel_work(policy); + struct policy_dbs_info *policy_dbs = policy->governor_data; + + gov_clear_update_util(policy_dbs->policy); + irq_work_sync(&policy_dbs->irq_work); + cancel_work_sync(&policy_dbs->work); + atomic_set(&policy_dbs->work_count, 0); + policy_dbs->work_in_progress = false; } EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop); From d352cf47d93e39494b44b792cca8d35a3a0bd9b3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 13 Jun 2016 23:33:49 +0200 Subject: [PATCH 22/41] cpufreq: conservative: Do not use transition notifications The conservative governor registers a transition notifier so it can update its internal requested_freq value if it falls out of the policy->min...policy->max range, but requested_freq is not really necessary. That value is used to track the frequency requested by the governor previously, but policy->cur can be used instead of it and then the governor will not have to worry about updating the tracked value when the current frequency changes independently (for example, as a result of min or max changes). Accodringly, drop requested_freq from struct cs_policy_dbs_info and modify cs_dbs_timer() to use policy->cur instead of it. While at it, notice that __cpufreq_driver_target() clamps its target_freq argument between policy->min and policy->max, so the callers of it don't have to do that and make additional changes in cs_dbs_timer() in accordance with that. After these changes the transition notifier used by the conservative governor is not necessary any more, so drop it, which also makes it possible to drop the struct cs_governor definition and simplify the code accordingly. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 103 +++++-------------------- 1 file changed, 21 insertions(+), 82 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index f967ec6c572001..18da4f8051d372 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -17,7 +17,6 @@ struct cs_policy_dbs_info { struct policy_dbs_info policy_dbs; unsigned int down_skip; - unsigned int requested_freq; }; static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) @@ -75,19 +74,17 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) /* Check for frequency increase */ if (load > dbs_data->up_threshold) { + unsigned int requested_freq = policy->cur; + dbs_info->down_skip = 0; /* if we are already at full speed then break out early */ - if (dbs_info->requested_freq == policy->max) + if (requested_freq == policy->max) goto out; - dbs_info->requested_freq += get_freq_target(cs_tuners, policy); - - if (dbs_info->requested_freq > policy->max) - dbs_info->requested_freq = policy->max; + requested_freq += get_freq_target(cs_tuners, policy); - __cpufreq_driver_target(policy, dbs_info->requested_freq, - CPUFREQ_RELATION_H); + __cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_H); goto out; } @@ -98,34 +95,26 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) /* Check for frequency decrease */ if (load < cs_tuners->down_threshold) { - unsigned int freq_target; + unsigned int freq_target, requested_freq = policy->cur; /* * if we cannot reduce the frequency anymore, break out early */ - if (policy->cur == policy->min) + if (requested_freq == policy->min) goto out; freq_target = get_freq_target(cs_tuners, policy); - if (dbs_info->requested_freq > freq_target) - dbs_info->requested_freq -= freq_target; + if (requested_freq > freq_target) + requested_freq -= freq_target; else - dbs_info->requested_freq = policy->min; + requested_freq = policy->min; - __cpufreq_driver_target(policy, dbs_info->requested_freq, - CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_L); } out: return dbs_data->sampling_rate; } -static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data); - -static struct notifier_block cs_cpufreq_notifier_block = { - .notifier_call = dbs_cpufreq_notifier, -}; - /************************** sysfs interface ************************/ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, @@ -254,13 +243,6 @@ static struct attribute *cs_attributes[] = { /************************** sysfs end ************************/ -struct cs_governor { - struct dbs_governor dbs_gov; - unsigned int usage_count; -}; - -static struct cs_governor cs_gov; - static struct policy_dbs_info *cs_alloc(void) { struct cs_policy_dbs_info *dbs_info; @@ -292,25 +274,11 @@ static int cs_init(struct dbs_data *dbs_data) dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); - /* - * This function and cs_exit() are only called under gov_dbs_data_mutex - * which is global, so the cs_gov.usage_count accesses are guaranteed - * to be serialized. - */ - if (!cs_gov.usage_count++) - cpufreq_register_notifier(&cs_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - return 0; } static void cs_exit(struct dbs_data *dbs_data) { - /* Protected by gov_dbs_data_mutex - see the comment in cs_init(). */ - if (!--cs_gov.usage_count) - cpufreq_unregister_notifier(&cs_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - kfree(dbs_data->tuners); } @@ -319,49 +287,20 @@ static void cs_start(struct cpufreq_policy *policy) struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); dbs_info->down_skip = 0; - dbs_info->requested_freq = policy->cur; } -static struct cs_governor cs_gov = { - .dbs_gov = { - .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), - .kobj_type = { .default_attrs = cs_attributes }, - .gov_dbs_timer = cs_dbs_timer, - .alloc = cs_alloc, - .free = cs_free, - .init = cs_init, - .exit = cs_exit, - .start = cs_start, - }, +static struct dbs_governor cs_governor = { + .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), + .kobj_type = { .default_attrs = cs_attributes }, + .gov_dbs_timer = cs_dbs_timer, + .alloc = cs_alloc, + .free = cs_free, + .init = cs_init, + .exit = cs_exit, + .start = cs_start, }; -#define CPU_FREQ_GOV_CONSERVATIVE (&cs_gov.dbs_gov.gov) - -static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); - struct cs_policy_dbs_info *dbs_info; - - if (!policy) - return 0; - - /* policy isn't governed by conservative governor */ - if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE) - return 0; - - dbs_info = to_dbs_info(policy->governor_data); - /* - * we only care if our internally tracked freq moves outside the 'valid' - * ranges of frequency available to us otherwise we do not change it - */ - if (dbs_info->requested_freq > policy->max - || dbs_info->requested_freq < policy->min) - dbs_info->requested_freq = freq->new; - - return 0; -} +#define CPU_FREQ_GOV_CONSERVATIVE (&cs_governor.gov) static int __init cpufreq_gov_dbs_init(void) { From 41bad47f76481d8f55a555274062707e41e57e0e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 9 Jun 2016 15:34:41 -0700 Subject: [PATCH 23/41] cpufreq: intel_pstate: Broxton support Add Broxton CPU model number. Broxton requires core_params to get performance limits via MSRs, but it is an Atom platform, which requires more power optimized algorithm. So the P state selection will use similar algorithm as other Atom platforms. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e1b7a5e288f992..8a0e40591f1f8d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1110,6 +1110,26 @@ static struct cpu_defaults knl_params = { }, }; +static struct cpu_defaults bxt_params = { + .pid_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 60, + .p_gain_pct = 14, + .d_gain_pct = 0, + .i_gain_pct = 4, + }, + .funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .get_target_pstate = get_target_pstate_use_cpu_load, + }, +}; + static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) { int max_perf = cpu->pstate.turbo_pstate; @@ -1370,6 +1390,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params), ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params), + ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); From a0944d904f951466047cb7ae625fc17927084e59 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 7 Jun 2016 12:30:13 +0100 Subject: [PATCH 24/41] cpufreq: mvebu: fix integer to pointer cast Fix the use of 0 instead of NULL to clk_get() call. This stops the following warning: drivers/cpufreq/mvebu-cpufreq.c:73:40: warning: Using plain integer as NULL pointer Signed-off-by: Ben Dooks Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/mvebu-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c index e920889b9ac29f..ed915ee85dd994 100644 --- a/drivers/cpufreq/mvebu-cpufreq.c +++ b/drivers/cpufreq/mvebu-cpufreq.c @@ -70,7 +70,7 @@ static int __init armada_xp_pmsu_cpufreq_init(void) continue; } - clk = clk_get(cpu_dev, 0); + clk = clk_get(cpu_dev, NULL); if (IS_ERR(clk)) { pr_err("Cannot get clock for CPU %d\n", cpu); return PTR_ERR(clk); From eed436095e62351c9f6d6b3ac6cdd18e89a852d8 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 27 Jun 2016 18:07:16 +0800 Subject: [PATCH 25/41] intel_pstate: Fix incorrect placement of __initdata __initdata should be placed between the variable name and equal sign (if there is) for the variable to be placed in the intended section. Signed-off-by: Jisheng Zhang Acked-by: Viresh Kumar Acked-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 10393fb4a95594..6390d524a211b4 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1596,9 +1596,9 @@ static struct cpufreq_driver intel_pstate_driver = { .name = "intel_pstate", }; -static int __initdata no_load; -static int __initdata no_hwp; -static int __initdata hwp_only; +static int no_load __initdata; +static int no_hwp __initdata; +static int hwp_only __initdata; static unsigned int force_load; static int intel_pstate_msrs_not_valid(void) From 29327c84baebec3549b9775dc8de3d26343720cb Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 27 Jun 2016 18:07:17 +0800 Subject: [PATCH 26/41] intel_pstate: add __init/__initdata marker to some functions/variables These functions/variables are not needed after booting, so mark them as __init or __initdata. Signed-off-by: Jisheng Zhang Acked-by: Viresh Kumar Acked-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6390d524a211b4..fc71f92cf0167b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1377,7 +1377,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); -static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = { +static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), {} }; @@ -1599,9 +1599,9 @@ static struct cpufreq_driver intel_pstate_driver = { static int no_load __initdata; static int no_hwp __initdata; static int hwp_only __initdata; -static unsigned int force_load; +static unsigned int force_load __initdata; -static int intel_pstate_msrs_not_valid(void) +static int __init intel_pstate_msrs_not_valid(void) { if (!pstate_funcs.get_max() || !pstate_funcs.get_min() || @@ -1611,7 +1611,7 @@ static int intel_pstate_msrs_not_valid(void) return 0; } -static void copy_pid_params(struct pstate_adjust_policy *policy) +static void __init copy_pid_params(struct pstate_adjust_policy *policy) { pid_params.sample_rate_ms = policy->sample_rate_ms; pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; @@ -1622,7 +1622,7 @@ static void copy_pid_params(struct pstate_adjust_policy *policy) pid_params.setpoint = policy->setpoint; } -static void copy_cpu_funcs(struct pstate_funcs *funcs) +static void __init copy_cpu_funcs(struct pstate_funcs *funcs) { pstate_funcs.get_max = funcs->get_max; pstate_funcs.get_max_physical = funcs->get_max_physical; @@ -1637,7 +1637,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) #ifdef CONFIG_ACPI -static bool intel_pstate_no_acpi_pss(void) +static bool __init intel_pstate_no_acpi_pss(void) { int i; @@ -1666,7 +1666,7 @@ static bool intel_pstate_no_acpi_pss(void) return true; } -static bool intel_pstate_has_acpi_ppc(void) +static bool __init intel_pstate_has_acpi_ppc(void) { int i; @@ -1694,7 +1694,7 @@ struct hw_vendor_info { }; /* Hardware vendor-specific info that has its own power management modes */ -static struct hw_vendor_info vendor_info[] = { +static struct hw_vendor_info vendor_info[] __initdata = { {1, "HP ", "ProLiant", PSS}, {1, "ORACLE", "X4-2 ", PPC}, {1, "ORACLE", "X4-2L ", PPC}, @@ -1713,7 +1713,7 @@ static struct hw_vendor_info vendor_info[] = { {0, "", ""}, }; -static bool intel_pstate_platform_pwr_mgmt_exists(void) +static bool __init intel_pstate_platform_pwr_mgmt_exists(void) { struct acpi_table_header hdr; struct hw_vendor_info *v_info; From 4a7cb7a96aa32e34777b263fbf372543fb85ddea Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 27 Jun 2016 18:07:18 +0800 Subject: [PATCH 27/41] intel_pstate: Declare pid_params/pstate_funcs/hwp_active __read_mostly pid_params is written once by copy_pid_params() during initialization, and thereafter is mostly read by hot path intel_pstate_update_util(). The read of pid_params gets more after commit a4675fbc4a7a ("cpufreq: intel_pstate: Replace timers with utilization update callbacks") pstate_funcs is written once by copy_cpu_funcs() during initialization, and thereafter is mostly read by hot path intel_pstate_update_util() hwp_active is written to once during initialization and thereafter is mostly read by hot path intel_pstate_update_util(). The fact that they are mostly read and not written to makes them candidates for __read_mostly declarations. Signed-off-by: Jisheng Zhang Acked-by: Viresh Kumar Acked-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fc71f92cf0167b..2ed7955f269894 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -282,9 +282,9 @@ struct cpu_defaults { static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu); static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); -static struct pstate_adjust_policy pid_params; -static struct pstate_funcs pstate_funcs; -static int hwp_active; +static struct pstate_adjust_policy pid_params __read_mostly; +static struct pstate_funcs pstate_funcs __read_mostly; +static int hwp_active __read_mostly; #ifdef CONFIG_ACPI static bool acpi_ppc; From 8d540ea79211dd272403ed79b2033bbded19ac42 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 28 Jun 2016 03:31:02 +0200 Subject: [PATCH 28/41] cpufreq: Drop redundant check from cpufreq_update_current_freq() Both callers of cpufreq_update_current_freq(), cpufreq_update_policy() and cpufreq_start_governor(), check cpufreq_suspended before calling that function, so drop the redundant cpufreq_suspended check from it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5918e954bb5dd2..118b4f30a4060e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1544,9 +1544,6 @@ static unsigned int cpufreq_update_current_freq(struct cpufreq_policy *policy) { unsigned int new_freq; - if (cpufreq_suspended) - return 0; - new_freq = cpufreq_driver->get(policy->cpu); if (!new_freq) return 0; From da0c6dc00c69d0adaae99c3b3d2ea0c869a3fb35 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 27 Jun 2016 16:04:07 +0530 Subject: [PATCH 29/41] cpufreq: Handle sorted frequency tables more efficiently cpufreq drivers aren't required to provide a sorted frequency table today, and even the ones which provide a sorted table aren't handled efficiently by cpufreq core. This patch adds infrastructure to verify if the freq-table provided by the drivers is sorted or not, and use efficient helpers if they are sorted. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/freq_table.c | 73 +++++++++-- include/linux/cpufreq.h | 234 ++++++++++++++++++++++++++++++++++- 2 files changed, 296 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index eac8bcbdaad1b1..3bbbf9e6960cdf 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -113,9 +113,9 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); -int cpufreq_frequency_table_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) +int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) { struct cpufreq_frequency_table optimal = { .driver_data = ~0, @@ -205,7 +205,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, table[index].frequency); return index; } -EXPORT_SYMBOL_GPL(cpufreq_frequency_table_target); +EXPORT_SYMBOL_GPL(cpufreq_table_index_unsorted); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq) @@ -297,15 +297,72 @@ struct freq_attr *cpufreq_generic_attr[] = { }; EXPORT_SYMBOL_GPL(cpufreq_generic_attr); +static int set_freq_table_sorted(struct cpufreq_policy *policy) +{ + struct cpufreq_frequency_table *pos, *table = policy->freq_table; + struct cpufreq_frequency_table *prev = NULL; + int ascending = 0; + + policy->freq_table_sorted = CPUFREQ_TABLE_UNSORTED; + + cpufreq_for_each_valid_entry(pos, table) { + if (!prev) { + prev = pos; + continue; + } + + if (pos->frequency == prev->frequency) { + pr_warn("Duplicate freq-table entries: %u\n", + pos->frequency); + return -EINVAL; + } + + /* Frequency increased from prev to pos */ + if (pos->frequency > prev->frequency) { + /* But frequency was decreasing earlier */ + if (ascending < 0) { + pr_debug("Freq table is unsorted\n"); + return 0; + } + + ascending++; + } else { + /* Frequency decreased from prev to pos */ + + /* But frequency was increasing earlier */ + if (ascending > 0) { + pr_debug("Freq table is unsorted\n"); + return 0; + } + + ascending--; + } + + prev = pos; + } + + if (ascending > 0) + policy->freq_table_sorted = CPUFREQ_TABLE_SORTED_ASCENDING; + else + policy->freq_table_sorted = CPUFREQ_TABLE_SORTED_DESCENDING; + + pr_debug("Freq table is sorted in %s order\n", + ascending > 0 ? "ascending" : "descending"); + + return 0; +} + int cpufreq_table_validate_and_show(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table) { - int ret = cpufreq_frequency_table_cpuinfo(policy, table); + int ret; - if (!ret) - policy->freq_table = table; + ret = cpufreq_frequency_table_cpuinfo(policy, table); + if (ret) + return ret; - return ret; + policy->freq_table = table; + return set_freq_table_sorted(policy); } EXPORT_SYMBOL_GPL(cpufreq_table_validate_and_show); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c378776628b4b5..c6410b1b24903d 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -36,6 +36,12 @@ struct cpufreq_governor; +enum cpufreq_table_sorting { + CPUFREQ_TABLE_UNSORTED, + CPUFREQ_TABLE_SORTED_ASCENDING, + CPUFREQ_TABLE_SORTED_DESCENDING +}; + struct cpufreq_freqs { unsigned int cpu; /* cpu nr */ unsigned int old; @@ -87,6 +93,7 @@ struct cpufreq_policy { struct cpufreq_user_policy user_policy; struct cpufreq_frequency_table *freq_table; + enum cpufreq_table_sorting freq_table_sorted; struct list_head policy_list; struct kobject kobj; @@ -597,9 +604,9 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy); -int cpufreq_frequency_table_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); +int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); @@ -610,6 +617,227 @@ int cpufreq_boost_trigger_state(int state); int cpufreq_boost_enabled(void); int cpufreq_enable_boost_support(void); bool policy_has_boost_freq(struct cpufreq_policy *policy); + +/* Find lowest freq at or above target in a table in ascending order */ +static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq >= target_freq) + return i; + + best = i; + } + + return best; +} + +/* Find lowest freq at or above target in a table in descending order */ +static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq == target_freq) + return i; + + if (freq > target_freq) { + best = i; + continue; + } + + /* No freq found above target_freq */ + if (best == -1) + return i; + + return best; + } + + return best; +} + +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) + return cpufreq_table_find_index_al(policy, target_freq); + else + return cpufreq_table_find_index_dl(policy, target_freq); +} + +/* Find highest freq at or below target in a table in ascending order */ +static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq == target_freq) + return i; + + if (freq < target_freq) { + best = i; + continue; + } + + /* No freq found below target_freq */ + if (best == -1) + return i; + + return best; + } + + return best; +} + +/* Find highest freq at or below target in a table in descending order */ +static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq <= target_freq) + return i; + + best = i; + } + + return best; +} + +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) + return cpufreq_table_find_index_ah(policy, target_freq); + else + return cpufreq_table_find_index_dh(policy, target_freq); +} + +/* Find closest freq to target in a table in ascending order */ +static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq == target_freq) + return i; + + if (freq < target_freq) { + best = i; + continue; + } + + /* No freq found below target_freq */ + if (best == -1) + return i; + + /* Choose the closest freq */ + if (target_freq - table[best].frequency > freq - target_freq) + return i; + + return best; + } + + return best; +} + +/* Find closest freq to target in a table in descending order */ +static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq == target_freq) + return i; + + if (freq > target_freq) { + best = i; + continue; + } + + /* No freq found above target_freq */ + if (best == -1) + return i; + + /* Choose the closest freq */ + if (table[best].frequency - target_freq > target_freq - freq) + return i; + + return best; + } + + return best; +} + +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) + return cpufreq_table_find_index_ac(policy, target_freq); + else + return cpufreq_table_find_index_dc(policy, target_freq); +} + +static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) + return cpufreq_table_index_unsorted(policy, target_freq, + relation); + + switch (relation) { + case CPUFREQ_RELATION_L: + return cpufreq_table_find_index_l(policy, target_freq); + case CPUFREQ_RELATION_H: + return cpufreq_table_find_index_h(policy, target_freq); + case CPUFREQ_RELATION_C: + return cpufreq_table_find_index_c(policy, target_freq); + default: + pr_err("%s: Invalid relation: %d\n", __func__, relation); + return -EINVAL; + } +} #else static inline int cpufreq_boost_trigger_state(int state) { From 825773609c8ae34a19d4b593555493ae2754eff7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 27 Jun 2016 09:59:34 +0530 Subject: [PATCH 30/41] cpufreq: Reuse new freq-table helpers This patch migrates few users of cpufreq tables to the new helpers that work on sorted freq-tables. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 14 ++++---------- drivers/cpufreq/amd_freq_sensitivity.c | 4 ++-- drivers/cpufreq/cpufreq_ondemand.c | 6 ++---- drivers/cpufreq/powernv-cpufreq.c | 3 +-- drivers/cpufreq/s5pv210-cpufreq.c | 3 +-- 5 files changed, 10 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 32a15052f363f8..11c9a078e0fdd1 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -468,20 +468,14 @@ unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; struct cpufreq_frequency_table *entry; - unsigned int next_perf_state, next_freq, freq; + unsigned int next_perf_state, next_freq, index; /* * Find the closest frequency above target_freq. - * - * The table is sorted in the reverse order with respect to the - * frequency and all of the entries are valid (see the initialization). */ - entry = policy->freq_table; - do { - entry++; - freq = entry->frequency; - } while (freq >= target_freq && freq != CPUFREQ_TABLE_END); - entry--; + index = cpufreq_table_find_index_dl(policy, target_freq); + + entry = &policy->freq_table[index]; next_freq = entry->frequency; next_perf_state = entry->driver_data; diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 6d5dc04c3a3762..042023bbbf6214 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -91,8 +91,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, else { unsigned int index; - index = cpufreq_frequency_table_target(policy, - policy->cur - 1, CPUFREQ_RELATION_H); + index = cpufreq_table_find_index_h(policy, + policy->cur - 1); freq_next = policy->freq_table[index].frequency; } diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 0c93cd9dee996f..3a1f49f5f4c68a 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -85,11 +85,9 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, freq_avg = freq_req - freq_reduc; /* Find freq bounds for freq_avg in freq_table */ - index = cpufreq_frequency_table_target(policy, freq_avg, - CPUFREQ_RELATION_H); + index = cpufreq_table_find_index_h(policy, freq_avg); freq_lo = freq_table[index].frequency; - index = cpufreq_frequency_table_target(policy, freq_avg, - CPUFREQ_RELATION_L); + index = cpufreq_table_find_index_l(policy, freq_avg); freq_hi = freq_table[index].frequency; /* Find out how long we have to be in hi and lo freqs */ diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index b29c5c20c3a1c5..2a2920c4fdf9ca 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -760,8 +760,7 @@ void powernv_cpufreq_work_fn(struct work_struct *work) struct cpufreq_policy policy; cpufreq_get_policy(&policy, cpu); - index = cpufreq_frequency_table_target(&policy, policy.cur, - CPUFREQ_RELATION_C); + index = cpufreq_table_find_index_c(&policy, policy.cur); powernv_cpufreq_target_index(&policy, index); cpumask_andnot(&mask, &mask, policy.cpus); } diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 4f4e9df9b7fc99..9e07588ea9f580 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -246,8 +246,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index) new_freq = s5pv210_freq_table[index].frequency; /* Finding current running level index */ - priv_index = cpufreq_frequency_table_target(policy, old_freq, - CPUFREQ_RELATION_H); + priv_index = cpufreq_table_find_index_h(policy, old_freq); arm_volt = dvs_conf[index].arm_volt; int_volt = dvs_conf[index].int_volt; From 5fc8f707a2aa40c767c3a338738b9b6fcd151ac1 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 8 Jul 2016 20:42:04 +0200 Subject: [PATCH 31/41] intel_pstate: Fix MSR_CONFIG_TDP_x addressing in core_get_max_pstate() If MSR_CONFIG_TDP_CONTROL is locked, we currently try to address some MSR 0x80000648 or so. Mask out the relevant level bits 0 and 1. Found while running over the Jailhouse hypervisor which became upset about this strange MSR index. Signed-off-by: Jan Kiszka Acked-by: Srinivas Pandruvada Cc: 4.4+ # 4.4+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f6dd07275862fa..25bfdec4e74ee1 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -945,7 +945,7 @@ static int core_get_max_pstate(void) if (err) goto skip_tar; - tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl; + tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3); err = rdmsrl_safe(tdp_msr, &tdp_ratio); if (err) goto skip_tar; From 09ca4c9b5958403bf05482b365a67d88f10463e4 Mon Sep 17 00:00:00 2001 From: Akshay Adiga Date: Thu, 30 Jun 2016 11:53:07 +0530 Subject: [PATCH 32/41] cpufreq: powernv: Replacing pstate_id with frequency table index Refactoring code to use frequency table index instead of pstate_id. This abstraction will make the code independent of the pstate values. - No functional changes - The highest frequency is at frequency table index 0 and the frequency decreases as the index increases. - Macros pstates_to_idx() and idx_to_pstate() can be used for conversion between pstate_id and index. - powernv_pstate_info now contains frequency table index to min, max and nominal frequency (instead of pstate_ids) - global_pstate_info new stores index values instead pstate ids. - variables renamed as *_idx which now store index instead of pstate Signed-off-by: Akshay Adiga Reviewed-by: Gautham R. Shenoy Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 177 +++++++++++++++++------------- 1 file changed, 102 insertions(+), 75 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 2a2920c4fdf9ca..0f138b050e9a3b 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -64,12 +64,14 @@ /** * struct global_pstate_info - Per policy data structure to maintain history of * global pstates - * @highest_lpstate: The local pstate from which we are ramping down + * @highest_lpstate_idx: The local pstate index from which we are + * ramping down * @elapsed_time: Time in ms spent in ramping down from - * highest_lpstate + * highest_lpstate_idx * @last_sampled_time: Time from boot in ms when global pstates were * last set - * @last_lpstate,last_gpstate: Last set values for local and global pstates + * @last_lpstate_idx, Last set value of local pstate and global + * last_gpstate_idx pstate in terms of cpufreq table index * @timer: Is used for ramping down if cpu goes idle for * a long time with global pstate held high * @gpstate_lock: A spinlock to maintain synchronization between @@ -77,11 +79,11 @@ * governer's target_index calls */ struct global_pstate_info { - int highest_lpstate; + int highest_lpstate_idx; unsigned int elapsed_time; unsigned int last_sampled_time; - int last_lpstate; - int last_gpstate; + int last_lpstate_idx; + int last_gpstate_idx; spinlock_t gpstate_lock; struct timer_list timer; }; @@ -124,29 +126,47 @@ static int nr_chips; static DEFINE_PER_CPU(struct chip *, chip_info); /* - * Note: The set of pstates consists of contiguous integers, the - * smallest of which is indicated by powernv_pstate_info.min, the - * largest of which is indicated by powernv_pstate_info.max. + * Note: + * The set of pstates consists of contiguous integers. + * powernv_pstate_info stores the index of the frequency table for + * max, min and nominal frequencies. It also stores number of + * available frequencies. * - * The nominal pstate is the highest non-turbo pstate in this - * platform. This is indicated by powernv_pstate_info.nominal. + * powernv_pstate_info.nominal indicates the index to the highest + * non-turbo frequency. */ static struct powernv_pstate_info { - int min; - int max; - int nominal; - int nr_pstates; + unsigned int min; + unsigned int max; + unsigned int nominal; + unsigned int nr_pstates; } powernv_pstate_info; +/* Use following macros for conversions between pstate_id and index */ +static inline int idx_to_pstate(unsigned int i) +{ + return powernv_freqs[i].driver_data; +} + +static inline unsigned int pstate_to_idx(int pstate) +{ + /* + * abs() is deliberately used so that is works with + * both monotonically increasing and decreasing + * pstate values + */ + return abs(pstate - idx_to_pstate(powernv_pstate_info.max)); +} + static inline void reset_gpstates(struct cpufreq_policy *policy) { struct global_pstate_info *gpstates = policy->driver_data; - gpstates->highest_lpstate = 0; + gpstates->highest_lpstate_idx = 0; gpstates->elapsed_time = 0; gpstates->last_sampled_time = 0; - gpstates->last_lpstate = 0; - gpstates->last_gpstate = 0; + gpstates->last_lpstate_idx = 0; + gpstates->last_gpstate_idx = 0; } /* @@ -156,9 +176,10 @@ static inline void reset_gpstates(struct cpufreq_policy *policy) static int init_powernv_pstates(void) { struct device_node *power_mgt; - int i, pstate_min, pstate_max, pstate_nominal, nr_pstates = 0; + int i, nr_pstates = 0; const __be32 *pstate_ids, *pstate_freqs; u32 len_ids, len_freqs; + u32 pstate_min, pstate_max, pstate_nominal; power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); if (!power_mgt) { @@ -208,6 +229,7 @@ static int init_powernv_pstates(void) return -ENODEV; } + powernv_pstate_info.nr_pstates = nr_pstates; pr_debug("NR PStates %d\n", nr_pstates); for (i = 0; i < nr_pstates; i++) { u32 id = be32_to_cpu(pstate_ids[i]); @@ -216,15 +238,17 @@ static int init_powernv_pstates(void) pr_debug("PState id %d freq %d MHz\n", id, freq); powernv_freqs[i].frequency = freq * 1000; /* kHz */ powernv_freqs[i].driver_data = id; + + if (id == pstate_max) + powernv_pstate_info.max = i; + else if (id == pstate_nominal) + powernv_pstate_info.nominal = i; + else if (id == pstate_min) + powernv_pstate_info.min = i; } + /* End of list marker entry */ powernv_freqs[i].frequency = CPUFREQ_TABLE_END; - - powernv_pstate_info.min = pstate_min; - powernv_pstate_info.max = pstate_max; - powernv_pstate_info.nominal = pstate_nominal; - powernv_pstate_info.nr_pstates = nr_pstates; - return 0; } @@ -233,12 +257,12 @@ static unsigned int pstate_id_to_freq(int pstate_id) { int i; - i = powernv_pstate_info.max - pstate_id; + i = pstate_to_idx(pstate_id); if (i >= powernv_pstate_info.nr_pstates || i < 0) { pr_warn("PState id %d outside of PState table, " "reporting nominal id %d instead\n", - pstate_id, powernv_pstate_info.nominal); - i = powernv_pstate_info.max - powernv_pstate_info.nominal; + pstate_id, idx_to_pstate(powernv_pstate_info.nominal)); + i = powernv_pstate_info.nominal; } return powernv_freqs[i].frequency; @@ -252,7 +276,7 @@ static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy, char *buf) { return sprintf(buf, "%u\n", - pstate_id_to_freq(powernv_pstate_info.nominal)); + powernv_freqs[powernv_pstate_info.nominal].frequency); } struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = @@ -426,7 +450,7 @@ static void set_pstate(void *data) */ static inline unsigned int get_nominal_index(void) { - return powernv_pstate_info.max - powernv_pstate_info.nominal; + return powernv_pstate_info.nominal; } static void powernv_cpufreq_throttle_check(void *data) @@ -435,20 +459,22 @@ static void powernv_cpufreq_throttle_check(void *data) unsigned int cpu = smp_processor_id(); unsigned long pmsr; int pmsr_pmax; + unsigned int pmsr_pmax_idx; pmsr = get_pmspr(SPRN_PMSR); chip = this_cpu_read(chip_info); /* Check for Pmax Capping */ pmsr_pmax = (s8)PMSR_MAX(pmsr); - if (pmsr_pmax != powernv_pstate_info.max) { + pmsr_pmax_idx = pstate_to_idx(pmsr_pmax); + if (pmsr_pmax_idx != powernv_pstate_info.max) { if (chip->throttled) goto next; chip->throttled = true; - if (pmsr_pmax < powernv_pstate_info.nominal) { - pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", + if (pmsr_pmax_idx > powernv_pstate_info.nominal) { + pr_warn_once("CPU %d on Chip %u has Pmax(%d) reduced below nominal frequency(%d)\n", cpu, chip->id, pmsr_pmax, - powernv_pstate_info.nominal); + idx_to_pstate(powernv_pstate_info.nominal)); chip->throttle_sub_turbo++; } else { chip->throttle_turbo++; @@ -484,34 +510,35 @@ static void powernv_cpufreq_throttle_check(void *data) /** * calc_global_pstate - Calculate global pstate - * @elapsed_time: Elapsed time in milliseconds - * @local_pstate: New local pstate - * @highest_lpstate: pstate from which its ramping down + * @elapsed_time: Elapsed time in milliseconds + * @local_pstate_idx: New local pstate + * @highest_lpstate_idx: pstate from which its ramping down * * Finds the appropriate global pstate based on the pstate from which its * ramping down and the time elapsed in ramping down. It follows a quadratic * equation which ensures that it reaches ramping down to pmin in 5sec. */ static inline int calc_global_pstate(unsigned int elapsed_time, - int highest_lpstate, int local_pstate) + int highest_lpstate_idx, + int local_pstate_idx) { - int pstate_diff; + int index_diff; /* * Using ramp_down_percent we get the percentage of rampdown * that we are expecting to be dropping. Difference between - * highest_lpstate and powernv_pstate_info.min will give a absolute + * highest_lpstate_idx and powernv_pstate_info.min will give a absolute * number of how many pstates we will drop eventually by the end of * 5 seconds, then just scale it get the number pstates to be dropped. */ - pstate_diff = ((int)ramp_down_percent(elapsed_time) * - (highest_lpstate - powernv_pstate_info.min)) / 100; + index_diff = ((int)ramp_down_percent(elapsed_time) * + (powernv_pstate_info.min - highest_lpstate_idx)) / 100; /* Ensure that global pstate is >= to local pstate */ - if (highest_lpstate - pstate_diff < local_pstate) - return local_pstate; + if (highest_lpstate_idx + index_diff >= local_pstate_idx) + return local_pstate_idx; else - return highest_lpstate - pstate_diff; + return highest_lpstate_idx + index_diff; } static inline void queue_gpstate_timer(struct global_pstate_info *gpstates) @@ -547,7 +574,7 @@ void gpstate_timer_handler(unsigned long data) { struct cpufreq_policy *policy = (struct cpufreq_policy *)data; struct global_pstate_info *gpstates = policy->driver_data; - int gpstate_id; + int gpstate_idx; unsigned int time_diff = jiffies_to_msecs(jiffies) - gpstates->last_sampled_time; struct powernv_smp_call_data freq_data; @@ -557,29 +584,29 @@ void gpstate_timer_handler(unsigned long data) gpstates->last_sampled_time += time_diff; gpstates->elapsed_time += time_diff; - freq_data.pstate_id = gpstates->last_lpstate; + freq_data.pstate_id = idx_to_pstate(gpstates->last_lpstate_idx); - if ((gpstates->last_gpstate == freq_data.pstate_id) || + if ((gpstates->last_gpstate_idx == gpstates->last_lpstate_idx) || (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) { - gpstate_id = freq_data.pstate_id; + gpstate_idx = pstate_to_idx(freq_data.pstate_id); reset_gpstates(policy); - gpstates->highest_lpstate = freq_data.pstate_id; + gpstates->highest_lpstate_idx = gpstate_idx; } else { - gpstate_id = calc_global_pstate(gpstates->elapsed_time, - gpstates->highest_lpstate, - freq_data.pstate_id); + gpstate_idx = calc_global_pstate(gpstates->elapsed_time, + gpstates->highest_lpstate_idx, + freq_data.pstate_id); } /* * If local pstate is equal to global pstate, rampdown is over * So timer is not required to be queued. */ - if (gpstate_id != freq_data.pstate_id) + if (gpstate_idx != gpstates->last_lpstate_idx) queue_gpstate_timer(gpstates); - freq_data.gpstate_id = gpstate_id; - gpstates->last_gpstate = freq_data.gpstate_id; - gpstates->last_lpstate = freq_data.pstate_id; + freq_data.gpstate_id = idx_to_pstate(gpstate_idx); + gpstates->last_gpstate_idx = pstate_to_idx(freq_data.gpstate_id); + gpstates->last_lpstate_idx = pstate_to_idx(freq_data.pstate_id); spin_unlock(&gpstates->gpstate_lock); @@ -596,7 +623,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, unsigned int new_index) { struct powernv_smp_call_data freq_data; - unsigned int cur_msec, gpstate_id; + unsigned int cur_msec, gpstate_idx; struct global_pstate_info *gpstates = policy->driver_data; if (unlikely(rebooting) && new_index != get_nominal_index()) @@ -608,15 +635,15 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, cur_msec = jiffies_to_msecs(get_jiffies_64()); spin_lock(&gpstates->gpstate_lock); - freq_data.pstate_id = powernv_freqs[new_index].driver_data; + freq_data.pstate_id = idx_to_pstate(new_index); if (!gpstates->last_sampled_time) { - gpstate_id = freq_data.pstate_id; - gpstates->highest_lpstate = freq_data.pstate_id; + gpstate_idx = new_index; + gpstates->highest_lpstate_idx = new_index; goto gpstates_done; } - if (gpstates->last_gpstate > freq_data.pstate_id) { + if (gpstates->last_gpstate_idx < new_index) { gpstates->elapsed_time += cur_msec - gpstates->last_sampled_time; @@ -627,34 +654,34 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, */ if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) { reset_gpstates(policy); - gpstates->highest_lpstate = freq_data.pstate_id; - gpstate_id = freq_data.pstate_id; + gpstates->highest_lpstate_idx = new_index; + gpstate_idx = new_index; } else { /* Elaspsed_time is less than 5 seconds, continue to rampdown */ - gpstate_id = calc_global_pstate(gpstates->elapsed_time, - gpstates->highest_lpstate, - freq_data.pstate_id); + gpstate_idx = calc_global_pstate(gpstates->elapsed_time, + gpstates->highest_lpstate_idx, + new_index); } } else { reset_gpstates(policy); - gpstates->highest_lpstate = freq_data.pstate_id; - gpstate_id = freq_data.pstate_id; + gpstates->highest_lpstate_idx = new_index; + gpstate_idx = new_index; } /* * If local pstate is equal to global pstate, rampdown is over * So timer is not required to be queued. */ - if (gpstate_id != freq_data.pstate_id) + if (gpstate_idx != new_index) queue_gpstate_timer(gpstates); else del_timer_sync(&gpstates->timer); gpstates_done: - freq_data.gpstate_id = gpstate_id; + freq_data.gpstate_id = idx_to_pstate(gpstate_idx); gpstates->last_sampled_time = cur_msec; - gpstates->last_gpstate = freq_data.gpstate_id; - gpstates->last_lpstate = freq_data.pstate_id; + gpstates->last_gpstate_idx = gpstate_idx; + gpstates->last_lpstate_idx = new_index; spin_unlock(&gpstates->gpstate_lock); @@ -846,8 +873,8 @@ static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy) struct powernv_smp_call_data freq_data; struct global_pstate_info *gpstates = policy->driver_data; - freq_data.pstate_id = powernv_pstate_info.min; - freq_data.gpstate_id = powernv_pstate_info.min; + freq_data.pstate_id = idx_to_pstate(powernv_pstate_info.min); + freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min); smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1); del_timer_sync(&gpstates->timer); } From 2630abc243f8d014767c4eec1deb946bdbbcfc81 Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Tue, 19 Jul 2016 01:19:15 +0200 Subject: [PATCH 33/41] cpufreq: intel_pstate: clean remnant struct element When I was working with the Intel P state driver I came across a remnant struct element that is no longer needed after the function intel_pstate_calc_freq() was retired. Signed-off-by: Carsten Emde Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 25bfdec4e74ee1..edb3b6981b73cf 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -97,7 +97,6 @@ static inline u64 div_ext_fp(u64 x, u64 y) * read from MPERF MSR between last and current sample * @tsc: Difference of time stamp counter between last and * current sample - * @freq: Effective frequency calculated from APERF/MPERF * @time: Current time from scheduler * * This structure is used in the cpudata structure to store performance sample @@ -109,7 +108,6 @@ struct sample { u64 aperf; u64 mperf; u64 tsc; - int freq; u64 time; }; From bc95a454b6dcf6aa464deef86e18c1770b8682b8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 19 Jul 2016 15:10:37 +0200 Subject: [PATCH 34/41] intel_pstate: Update cpu_frequency tracepoint every time Currently, intel_pstate only updates the cpu_frequency tracepoint if the new P-state to set is different from the current one, but that causes powertop to report 100% idle on an 100% loaded system sometimes. Prevent that from happening by updating the cpu_frequency tracepoint every time intel_pstate_update_pstate() is called. Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada - --- drivers/cpufreq/intel_pstate.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index edb3b6981b73cf..9c8a78f9ddce62 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1132,17 +1132,12 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } -static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate) -{ - trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); - cpu->pstate.current_pstate = pstate; -} - static void intel_pstate_set_min_pstate(struct cpudata *cpu) { int pstate = cpu->pstate.min_pstate; - intel_pstate_record_pstate(cpu, pstate); + trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); + cpu->pstate.current_pstate = pstate; /* * Generally, there is no guarantee that this code will always run on * the CPU being updated, so force the register update to run on the @@ -1302,10 +1297,11 @@ static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) intel_pstate_get_min_max(cpu, &min_perf, &max_perf); pstate = clamp_t(int, pstate, min_perf, max_perf); + trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); if (pstate == cpu->pstate.current_pstate) return; - intel_pstate_record_pstate(cpu, pstate); + cpu->pstate.current_pstate = pstate; wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); } From da7de91c3e237f3ace1aa29b82c69702dc0176c5 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 19 Jul 2016 16:52:01 -0700 Subject: [PATCH 35/41] cpufreq: intel_pstate: Check cpuid for MSR_HWP_INTERRUPT The MSR MSR_HWP_INTERRUPT is valid only when CPUID.06H:EAX[8] = 1, so check for feature before accessing this MSR. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9c8a78f9ddce62..d8028def199dc3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -806,7 +806,8 @@ static void __init intel_pstate_sysfs_expose_params(void) static void intel_pstate_hwp_enable(struct cpudata *cpudata) { /* First disable HWP notification interrupt as we don't process them */ - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); + if (static_cpu_has(X86_FEATURE_HWP_NOTIFY)) + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } From e3c06236087051d5c62d60d0668588c370fda887 Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Wed, 13 Jul 2016 13:25:25 -0700 Subject: [PATCH 36/41] cpufreq: add cpufreq_driver_resolve_freq() Cpufreq governors may need to know what a particular target frequency maps to in the driver without necessarily wanting to set the frequency. Support this operation via a new cpufreq API, cpufreq_driver_resolve_freq(). This API returns the lowest driver frequency equal or greater than the target frequency (CPUFREQ_RELATION_L), subject to any policy (min/max) or driver limitations. The mapping is also cached in the policy so that a subsequent fast_switch operation can avoid repeating the same lookup. The API will call a new cpufreq driver callback, resolve_freq(), if it has been registered by the driver. Otherwise the frequency is resolved via cpufreq_frequency_table_target(). Rather than require ->target() style drivers to provide a resolve_freq() callback it is left to the caller to ensure that the driver implements this callback if necessary to use cpufreq_driver_resolve_freq(). Suggested-by: Rafael J. Wysocki Signed-off-by: Steve Muckle Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 25 +++++++++++++++++++++++++ include/linux/cpufreq.h | 16 ++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 118b4f30a4060e..b696baeb249d84 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -492,6 +492,29 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); +/** + * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported + * one. + * @target_freq: target frequency to resolve. + * + * The target to driver frequency mapping is cached in the policy. + * + * Return: Lowest driver-supported frequency greater than or equal to the + * given target_freq, subject to policy (min/max) and driver limitations. + */ +unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + policy->cached_target_freq = target_freq; + if (cpufreq_driver->resolve_freq) + return cpufreq_driver->resolve_freq(policy, target_freq); + policy->cached_resolved_idx = + cpufreq_frequency_table_target(policy, target_freq, + CPUFREQ_RELATION_L); + return policy->freq_table[policy->cached_resolved_idx].frequency; +} + /********************************************************************* * SYSFS INTERFACE * *********************************************************************/ @@ -2199,6 +2222,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, policy->min = new_policy->min; policy->max = new_policy->max; + policy->cached_target_freq = UINT_MAX; + pr_debug("new min and max freqs are %u - %u kHz\n", policy->min, policy->max); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c6410b1b24903d..631ba33bbe9fdb 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -120,6 +120,10 @@ struct cpufreq_policy { bool fast_switch_possible; bool fast_switch_enabled; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ + unsigned int cached_target_freq; + int cached_resolved_idx; + /* Synchronization for frequency transitions */ bool transition_ongoing; /* Tracks transition status */ spinlock_t transition_lock; @@ -270,6 +274,16 @@ struct cpufreq_driver { unsigned int index); unsigned int (*fast_switch)(struct cpufreq_policy *policy, unsigned int target_freq); + + /* + * Caches and returns the lowest driver-supported frequency greater than + * or equal to the target frequency, subject to any driver limitations. + * Does not set the frequency. Only to be implemented for drivers with + * target(). + */ + unsigned int (*resolve_freq)(struct cpufreq_policy *policy, + unsigned int target_freq); + /* * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION * unset. @@ -501,6 +515,8 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); +unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, + unsigned int target_freq); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); From 5cbea46984d67f614c74c4401b54b9d681861e80 Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Wed, 13 Jul 2016 13:25:26 -0700 Subject: [PATCH 37/41] cpufreq: schedutil: map raw required frequency to driver frequency The slow-path frequency transition path is relatively expensive as it requires waking up a thread to do work. Should support be added for remote CPU cpufreq updates that is also expensive since it requires an IPI. These activities should be avoided if they are not necessary. To that end, calculate the actual driver-supported frequency required by the new utilization value in schedutil by using the recently added cpufreq_driver_resolve_freq API. If it is the same as the previously requested driver frequency then there is no need to continue with the update assuming the cpu frequency limits have not changed. This will have additional benefits should the semantics of the rate limit be changed to apply solely to frequency transitions rather than to frequency calculations in schedutil. The last raw required frequency is cached. This allows the driver frequency lookup to be skipped in the event that the new raw required frequency matches the last one, assuming a frequency update has not been forced due to limits changing (indicated by a next_freq value of UINT_MAX, see sugov_should_update_freq). Signed-off-by: Steve Muckle Reviewed-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 758efd7f3abe9d..a84641b222c1b4 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -47,6 +47,8 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; + unsigned int cached_raw_freq; + /* The fields below are only needed when sharing a policy. */ unsigned long util; unsigned long max; @@ -106,7 +108,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, /** * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @policy: cpufreq policy object to compute the new frequency for. + * @sg_cpu: schedutil cpu object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. * @@ -121,14 +123,25 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, * next_freq = C * curr_freq * util_raw / max * * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. + * + * The lowest driver-supported frequency which is equal or greater than the raw + * next_freq (as calculated above) is returned, subject to policy min/max and + * cpufreq driver limitations. */ -static unsigned int get_next_freq(struct cpufreq_policy *policy, - unsigned long util, unsigned long max) +static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, + unsigned long max) { + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; - return (freq + (freq >> 2)) * util / max; + freq = (freq + (freq >> 2)) * util / max; + + if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) + return sg_policy->next_freq; + sg_cpu->cached_raw_freq = freq; + return cpufreq_driver_resolve_freq(policy, freq); } static void sugov_update_single(struct update_util_data *hook, u64 time, @@ -143,13 +156,14 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, return; next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : - get_next_freq(policy, util, max); + get_next_freq(sg_cpu, util, max); sugov_update_commit(sg_policy, time, next_f); } -static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy, +static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, unsigned long util, unsigned long max) { + struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; unsigned int max_f = policy->cpuinfo.max_freq; u64 last_freq_update_time = sg_policy->last_freq_update_time; @@ -189,7 +203,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy, } } - return get_next_freq(policy, util, max); + return get_next_freq(sg_cpu, util, max); } static void sugov_update_shared(struct update_util_data *hook, u64 time, @@ -206,7 +220,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_policy, util, max); + next_f = sugov_next_freq_shared(sg_cpu, util, max); sugov_update_commit(sg_policy, time, next_f); } @@ -433,6 +447,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->util = ULONG_MAX; sg_cpu->max = 0; sg_cpu->last_update = 0; + sg_cpu->cached_raw_freq = 0; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, sugov_update_shared); } else { From 5b6667c76d39e325d3c05b1f7e40f8a0e4bd6681 Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Wed, 13 Jul 2016 13:25:27 -0700 Subject: [PATCH 38/41] cpufreq: acpi-cpufreq: use cached frequency mapping when possible A call to cpufreq_driver_resolve_freq will cache the mapping from the desired target frequency to the frequency table index. If there is a mapping for the desired target frequency then use it instead of looking up the mapping again. Signed-off-by: Steve Muckle Reviewed-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 11c9a078e0fdd1..297e9128fe9fe1 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -473,7 +473,10 @@ unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, /* * Find the closest frequency above target_freq. */ - index = cpufreq_table_find_index_dl(policy, target_freq); + if (policy->cached_target_freq == target_freq) + index = policy->cached_resolved_idx; + else + index = cpufreq_table_find_index_dl(policy, target_freq); entry = &policy->freq_table[index]; next_freq = entry->frequency; From abe8bd024e7ac21ad21348c0fea09aefb4e469c7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 21 Jul 2016 14:39:26 -0700 Subject: [PATCH 39/41] cpufreq: Disallow ->resolve_freq() for drivers providing ->target_index() The handlers provided by cpufreq core are sufficient for resolving the frequency for drivers providing ->target_index(), as the core already has the frequency table and so ->resolve_freq() isn't required for such platforms. This patch disallows drivers with ->target_index() callback to use the ->resolve_freq() callback. Also, it fixes a potential kernel crash for drivers providing ->target() but no ->resolve_freq(). Fixes: e3c062360870 "cpufreq: add cpufreq_driver_resolve_freq()" Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b696baeb249d84..3ef9be3965ff16 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -507,12 +507,20 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, { target_freq = clamp_val(target_freq, policy->min, policy->max); policy->cached_target_freq = target_freq; + + if (cpufreq_driver->target_index) { + int idx; + + idx = cpufreq_frequency_table_target(policy, target_freq, + CPUFREQ_RELATION_L); + policy->cached_resolved_idx = idx; + return policy->freq_table[idx].frequency; + } + if (cpufreq_driver->resolve_freq) return cpufreq_driver->resolve_freq(policy, target_freq); - policy->cached_resolved_idx = - cpufreq_frequency_table_target(policy, target_freq, - CPUFREQ_RELATION_L); - return policy->freq_table[policy->cached_resolved_idx].frequency; + + return target_freq; } /********************************************************************* From ae2c1ca686d4b80258a442fa7f3edc71fd0ebb39 Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Thu, 21 Jul 2016 19:24:38 -0700 Subject: [PATCH 40/41] cpufreq: export cpufreq_driver_resolve_freq() Export cpufreq_driver_resolve_freq() since governors may be compiled as modules. Reported-by: Stephen Rothwell Signed-off-by: Steve Muckle Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3ef9be3965ff16..3dd4884c6f9ef5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -522,6 +522,7 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, return target_freq; } +EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); /********************************************************************* * SYSFS INTERFACE * From da7d3abe1c9e5ebac2cf86f97e9e89888a5e2094 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 22 Jul 2016 17:14:11 +0200 Subject: [PATCH 41/41] Revert "cpufreq: pcc-cpufreq: update default value of cpuinfo_transition_latency" This reverts commit 790d849bf811a8ab5d4cd2cce0f6fda92f6aebf2. Using a v4.7-rc7 kernel on a HP ProLiant triggered following messages pcc-cpufreq: (v1.10.00) driver loaded with frequency limits: 1200 MHz, 2800 MHz cpufreq: ondemand governor failed, too long transition latency of HW, fallback to performance governor The last line was shown for each CPU in the system. Testing v4.5 (where commit 790d849b was integrated) triggered similar messages. Same behaviour on a 2nd HP Proliant system. So commit 790d849bf (cpufreq: pcc-cpufreq: update default value of cpuinfo_transition_latency) causes the system to use performance governor which, I guess, was not the intention of the patch. Enabling debug output in pcc-cpufreq provides following verbose output: pcc-cpufreq: (v1.10.00) driver loaded with frequency limits: 1200 MHz, 2800 MHz pcc_get_offset: for CPU 0: pcc_cpu_data input_offset: 0x44, pcc_cpu_data output_offset: 0x48 init: policy->max is 2800000, policy->min is 1200000 get: get_freq for CPU 0 get: SUCCESS: (virtual) output_offset for cpu 0 is 0xffffc9000d7c0048, contains a value of: 0xff06. Speed is: 168000 MHz cpufreq: ondemand governor failed, too long transition latency of HW, fallback to performance governor target: CPU 0 should go to target freq: 2800000 (virtual) input_offset is 0xffffc9000d7c0044 target: was SUCCESSFUL for cpu 0 I am asking to revert 790d849bf to re-enable usage of ondemand governor with pcc-cpufreq. Fixes: 790d849bf (cpufreq: pcc-cpufreq: update default value of cpuinfo_transition_latency) CC: # 4.5+ Signed-off-by: Andreas Herrmann Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/pcc-cpufreq.txt | 4 ++-- drivers/cpufreq/pcc-cpufreq.c | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt index 0a94224ad2965b..9e3c3b33514c68 100644 --- a/Documentation/cpu-freq/pcc-cpufreq.txt +++ b/Documentation/cpu-freq/pcc-cpufreq.txt @@ -159,8 +159,8 @@ to be strictly associated with a P-state. 2.2 cpuinfo_transition_latency: ------------------------------- -The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification -does not include a field to expose this value currently. +The cpuinfo_transition_latency field is 0. The PCC specification does +not include a field to expose this value currently. 2.3 cpuinfo_cur_freq: --------------------- diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index a7ecb9a84c159a..3f0ce2ae35ee43 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -555,8 +555,6 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->min = policy->cpuinfo.min_freq = ioread32(&pcch_hdr->minimum_frequency) * 1000; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - pr_debug("init: policy->max is %d, policy->min is %d\n", policy->max, policy->min); out: