From 72f80ce4ef9b756185aab5f1955d8352b6021fba Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 13 Aug 2020 08:38:37 +0530 Subject: [PATCH 01/65] opp: Rename regulator_enabled and use it as status of all resources Expand the scope of the regulator_enabled flag and use it to track status of all the resources. This will be used for other stuff in the next patch. Tested-by: Rajendra Nayak Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 19 +++++++++---------- drivers/opp/opp.h | 4 ++-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 9668ea04cc808c..6f43ef4945b759 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -703,12 +703,10 @@ static int _generic_set_opp_regulator(struct opp_table *opp_table, * Enable the regulator after setting its voltages, otherwise it breaks * some boot-enabled regulators. */ - if (unlikely(!opp_table->regulator_enabled)) { + if (unlikely(!opp_table->enabled)) { ret = regulator_enable(reg); if (ret < 0) dev_warn(dev, "Failed to enable regulator: %d", ret); - else - opp_table->regulator_enabled = true; } return 0; @@ -909,12 +907,12 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) if (ret) goto put_opp_table; - if (opp_table->regulator_enabled) { + if (opp_table->regulators) regulator_disable(opp_table->regulators[0]); - opp_table->regulator_enabled = false; - } ret = _set_required_opps(dev, opp_table, NULL); + + opp_table->enabled = false; goto put_opp_table; } @@ -1001,8 +999,11 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) dev_err(dev, "Failed to set required opps: %d\n", ret); } - if (!ret) + if (!ret) { ret = _set_opp_bw(opp_table, opp, dev, false); + if (!ret) + opp_table->enabled = true; + } put_opp: dev_pm_opp_put(opp); @@ -1796,11 +1797,9 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table) /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); - if (opp_table->regulator_enabled) { + if (opp_table->enabled) { for (i = opp_table->regulator_count - 1; i >= 0; i--) regulator_disable(opp_table->regulators[i]); - - opp_table->regulator_enabled = false; } for (i = opp_table->regulator_count - 1; i >= 0; i--) diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index e51646ff279eb8..0c3de3f6db5c4c 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -147,11 +147,11 @@ enum opp_table_access { * @clk: Device's clock handle * @regulators: Supply regulators * @regulator_count: Number of power supply regulators. Its value can be -1 - * @regulator_enabled: Set to true if regulators were previously enabled. * (uninitialized), 0 (no opp-microvolt property) or > 0 (has opp-microvolt * property). * @paths: Interconnect path handles * @path_count: Number of interconnect paths + * @enabled: Set to true if the device's resources are enabled/configured. * @genpd_performance_state: Device's power domain support performance state. * @is_genpd: Marks if the OPP table belongs to a genpd. * @set_opp: Platform specific set_opp callback @@ -195,9 +195,9 @@ struct opp_table { struct clk *clk; struct regulator **regulators; int regulator_count; - bool regulator_enabled; struct icc_path **paths; unsigned int path_count; + bool enabled; bool genpd_performance_state; bool is_genpd; From 10b217365b9454bc5f31d2801148fdf04939f431 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 13 Aug 2020 08:38:37 +0530 Subject: [PATCH 02/65] opp: Reuse the enabled flag in !target_freq path The OPP core needs to track if the resources of devices are enabled/configured or not, as it disables the resources when target_freq is set to 0. Handle that with the new enabled flag and remove otherwise complex conditional statements. Tested-by: Rajendra Nayak Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 6f43ef4945b759..0b437d483b75df 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -886,22 +886,18 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) } if (unlikely(!target_freq)) { + ret = 0; + + if (!opp_table->enabled) + goto put_opp_table; + /* * Some drivers need to support cases where some platforms may * have OPP table for the device, while others don't and * opp_set_rate() just needs to behave like clk_set_rate(). */ - if (!_get_opp_count(opp_table)) { - ret = 0; - goto put_opp_table; - } - - if (!opp_table->required_opp_tables && !opp_table->regulators && - !opp_table->paths) { - dev_err(dev, "target frequency can't be 0\n"); - ret = -EINVAL; + if (!_get_opp_count(opp_table)) goto put_opp_table; - } ret = _set_opp_bw(opp_table, NULL, dev, true); if (ret) @@ -931,14 +927,11 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) old_freq = clk_get_rate(clk); /* Return early if nothing to do */ - if (old_freq == freq) { - if (!opp_table->required_opp_tables && !opp_table->regulators && - !opp_table->paths) { - dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n", - __func__, freq); - ret = 0; - goto put_opp_table; - } + if (opp_table->enabled && old_freq == freq) { + dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n", + __func__, freq); + ret = 0; + goto put_opp_table; } /* From f3364e17d5716a7356f16fe73c36cba50633ee18 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 13 Aug 2020 09:48:04 +0530 Subject: [PATCH 03/65] opp: Split out _opp_set_rate_zero() Create separate routine _opp_set_rate_zero() to handle !target_freq case. Tested-by: Rajendra Nayak Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 52 ++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0b437d483b75df..4edd2c3d6d9155 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -860,6 +860,34 @@ int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp) } EXPORT_SYMBOL_GPL(dev_pm_opp_set_bw); +static int _opp_set_rate_zero(struct device *dev, struct opp_table *opp_table) +{ + int ret; + + if (!opp_table->enabled) + return 0; + + /* + * Some drivers need to support cases where some platforms may + * have OPP table for the device, while others don't and + * opp_set_rate() just needs to behave like clk_set_rate(). + */ + if (!_get_opp_count(opp_table)) + return 0; + + ret = _set_opp_bw(opp_table, NULL, dev, true); + if (ret) + return ret; + + if (opp_table->regulators) + regulator_disable(opp_table->regulators[0]); + + ret = _set_required_opps(dev, opp_table, NULL); + + opp_table->enabled = false; + return ret; +} + /** * dev_pm_opp_set_rate() - Configure new OPP based on frequency * @dev: device for which we do this operation @@ -886,29 +914,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) } if (unlikely(!target_freq)) { - ret = 0; - - if (!opp_table->enabled) - goto put_opp_table; - - /* - * Some drivers need to support cases where some platforms may - * have OPP table for the device, while others don't and - * opp_set_rate() just needs to behave like clk_set_rate(). - */ - if (!_get_opp_count(opp_table)) - goto put_opp_table; - - ret = _set_opp_bw(opp_table, NULL, dev, true); - if (ret) - goto put_opp_table; - - if (opp_table->regulators) - regulator_disable(opp_table->regulators[0]); - - ret = _set_required_opps(dev, opp_table, NULL); - - opp_table->enabled = false; + ret = _opp_set_rate_zero(dev, opp_table); goto put_opp_table; } From 8aaf6264fc7fde15864c3e4d84ccc8b3af6811f3 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 20 Aug 2020 13:18:23 +0530 Subject: [PATCH 04/65] opp: Remove _dev_pm_opp_find_and_remove_table() wrapper Remove the unnecessary wrapper and merge _dev_pm_opp_find_and_remove_table() with dev_pm_opp_remove_table(). Tested-by: Rajendra Nayak Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 21 ++++++++------------- drivers/opp/cpu.c | 2 +- drivers/opp/of.c | 2 +- drivers/opp/opp.h | 1 - 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 4edd2c3d6d9155..6978b9218c6e6f 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2395,7 +2395,14 @@ int dev_pm_opp_unregister_notifier(struct device *dev, } EXPORT_SYMBOL(dev_pm_opp_unregister_notifier); -void _dev_pm_opp_find_and_remove_table(struct device *dev) +/** + * dev_pm_opp_remove_table() - Free all OPPs associated with the device + * @dev: device pointer used to lookup OPP table. + * + * Free both OPPs created using static entries present in DT and the + * dynamically added entries. + */ +void dev_pm_opp_remove_table(struct device *dev) { struct opp_table *opp_table; @@ -2420,16 +2427,4 @@ void _dev_pm_opp_find_and_remove_table(struct device *dev) /* Drop reference taken while the OPP table was added */ dev_pm_opp_put_opp_table(opp_table); } - -/** - * dev_pm_opp_remove_table() - Free all OPPs associated with the device - * @dev: device pointer used to lookup OPP table. - * - * Free both OPPs created using static entries present in DT and the - * dynamically added entries. - */ -void dev_pm_opp_remove_table(struct device *dev) -{ - _dev_pm_opp_find_and_remove_table(dev); -} EXPORT_SYMBOL_GPL(dev_pm_opp_remove_table); diff --git a/drivers/opp/cpu.c b/drivers/opp/cpu.c index b5055cc886ef58..5004335cf0de64 100644 --- a/drivers/opp/cpu.c +++ b/drivers/opp/cpu.c @@ -124,7 +124,7 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, continue; } - _dev_pm_opp_find_and_remove_table(cpu_dev); + dev_pm_opp_remove_table(cpu_dev); } } diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 0430290670ab39..7d9d4455a59e9a 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -616,7 +616,7 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, */ void dev_pm_opp_of_remove_table(struct device *dev) { - _dev_pm_opp_find_and_remove_table(dev); + dev_pm_opp_remove_table(dev); } EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 0c3de3f6db5c4c..78e876ec803e19 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -217,7 +217,6 @@ void _get_opp_table_kref(struct opp_table *opp_table); int _get_opp_count(struct opp_table *opp_table); struct opp_table *_find_opp_table(struct device *dev); struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table); -void _dev_pm_opp_find_and_remove_table(struct device *dev); struct dev_pm_opp *_opp_allocate(struct opp_table *opp_table); void _opp_free(struct dev_pm_opp *opp); int _opp_compare_key(struct dev_pm_opp *opp1, struct dev_pm_opp *opp2); From dd461cd9183fe80bee84fd6cab2a29bfc8f55a76 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 27 Jul 2020 11:30:46 +0200 Subject: [PATCH 05/65] opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER The OPP core manages various resources, e.g. clocks or interconnect paths. These resources are looked up when the OPP table is allocated once dev_pm_opp_get_opp_table() is called the first time (either directly or indirectly through one of the many helper functions). At this point, the resources may not be available yet, i.e. looking them up will result in -EPROBE_DEFER. Unfortunately, dev_pm_opp_get_opp_table() is currently unable to propagate this error code since it only returns the allocated OPP table or NULL. This means that all consumers of the OPP core are required to make sure that all necessary resources are available. Usually this happens by requesting them, checking the result and releasing them immediately after. For example, we have added "dev_pm_opp_of_find_icc_paths(dev, NULL)" to several drivers now just to make sure the interconnect providers are ready before the OPP table is allocated. If this call is missing, the OPP core will only warn about this and then attempt to continue without interconnect. This will eventually fail horribly, e.g.: cpu cpu0: _allocate_opp_table: Error finding interconnect paths: -517 ... later ... of: _read_bw: Mismatch between opp-peak-kBps and paths (1 0) cpu cpu0: _opp_add_static_v2: opp key field not found cpu cpu0: _of_add_opp_table_v2: Failed to add OPP, -22 This example happens when trying to use interconnects for a CPU OPP table together with qcom-cpufreq-nvmem.c. qcom-cpufreq-nvmem calls dev_pm_opp_set_supported_hw(), which ends up allocating the OPP table early. To fix the problem with the current approach we would need to add yet another call to dev_pm_opp_of_find_icc_paths(dev, NULL). But actually qcom-cpufreq-nvmem.c has nothing to do with interconnects... This commit attempts to make this more robust by allowing dev_pm_opp_get_opp_table() to return an error pointer. Fixing all the usages is trivial because the function is usually used indirectly through another helper (e.g. dev_pm_opp_set_supported_hw() above). These other helpers already return an error pointer. The example above then works correctly because set_supported_hw() will return -EPROBE_DEFER, and qcom-cpufreq-nvmem.c already propagates that error. It should also be possible to remove the remaining usages of "dev_pm_opp_of_find_icc_paths(dev, NULL)" from other drivers as well. Note that this commit currently only handles -EPROBE_DEFER for the clock/interconnects within _allocate_opp_table(). Other errors are just ignored as before. Eventually those should be propagated as well. Signed-off-by: Stephan Gerhold Acked-by: Krzysztof Kozlowski Reviewed-by: Ulf Hansson [ Viresh: skip checking return value of dev_pm_opp_get_opp_table() for EPROBE_DEFER in domain.c, fix NULL return value and reorder code a bit in core.c, and update exynos-asv.c ] Signed-off-by: Viresh Kumar --- drivers/base/power/domain.c | 14 +++++---- drivers/opp/core.c | 53 +++++++++++++++++++------------- drivers/opp/of.c | 8 ++--- drivers/soc/samsung/exynos-asv.c | 2 +- 4 files changed, 44 insertions(+), 33 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 2cb5e04cf86cd3..b92bb61550d37c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2044,8 +2044,9 @@ int of_genpd_add_provider_simple(struct device_node *np, if (genpd->set_performance_state) { ret = dev_pm_opp_of_add_table(&genpd->dev); if (ret) { - dev_err(&genpd->dev, "Failed to add OPP table: %d\n", - ret); + if (ret != -EPROBE_DEFER) + dev_err(&genpd->dev, "Failed to add OPP table: %d\n", + ret); goto unlock; } @@ -2054,7 +2055,7 @@ int of_genpd_add_provider_simple(struct device_node *np, * state. */ genpd->opp_table = dev_pm_opp_get_opp_table(&genpd->dev); - WARN_ON(!genpd->opp_table); + WARN_ON(IS_ERR(genpd->opp_table)); } ret = genpd_add_provider(np, genpd_xlate_simple, genpd); @@ -2111,8 +2112,9 @@ int of_genpd_add_provider_onecell(struct device_node *np, if (genpd->set_performance_state) { ret = dev_pm_opp_of_add_table_indexed(&genpd->dev, i); if (ret) { - dev_err(&genpd->dev, "Failed to add OPP table for index %d: %d\n", - i, ret); + if (ret != -EPROBE_DEFER) + dev_err(&genpd->dev, "Failed to add OPP table for index %d: %d\n", + i, ret); goto error; } @@ -2121,7 +2123,7 @@ int of_genpd_add_provider_onecell(struct device_node *np, * performance state. */ genpd->opp_table = dev_pm_opp_get_opp_table_indexed(&genpd->dev, i); - WARN_ON(!genpd->opp_table); + WARN_ON(IS_ERR(genpd->opp_table)); } genpd->provider = &np->fwnode; diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 9d7fb45b1786d2..9f16769894834d 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1063,7 +1063,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) */ opp_table = kzalloc(sizeof(*opp_table), GFP_KERNEL); if (!opp_table) - return NULL; + return ERR_PTR(-ENOMEM); mutex_init(&opp_table->lock); mutex_init(&opp_table->genpd_virt_dev_lock); @@ -1074,8 +1074,8 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) opp_dev = _add_opp_dev(dev, opp_table); if (!opp_dev) { - kfree(opp_table); - return NULL; + ret = -ENOMEM; + goto err; } _of_init_opp_table(opp_table, dev, index); @@ -1084,16 +1084,21 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) opp_table->clk = clk_get(dev, NULL); if (IS_ERR(opp_table->clk)) { ret = PTR_ERR(opp_table->clk); - if (ret != -EPROBE_DEFER) - dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, - ret); + if (ret == -EPROBE_DEFER) + goto err; + + dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, ret); } /* Find interconnect path(s) for the device */ ret = dev_pm_opp_of_find_icc_paths(dev, opp_table); - if (ret) + if (ret) { + if (ret == -EPROBE_DEFER) + goto err; + dev_warn(dev, "%s: Error finding interconnect paths: %d\n", __func__, ret); + } BLOCKING_INIT_NOTIFIER_HEAD(&opp_table->head); INIT_LIST_HEAD(&opp_table->opp_list); @@ -1102,6 +1107,10 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) /* Secure the device table modification */ list_add(&opp_table->node, &opp_tables); return opp_table; + +err: + kfree(opp_table); + return ERR_PTR(ret); } void _get_opp_table_kref(struct opp_table *opp_table) @@ -1124,7 +1133,7 @@ static struct opp_table *_opp_get_opp_table(struct device *dev, int index) if (opp_table) { if (!_add_opp_dev_unlocked(dev, opp_table)) { dev_pm_opp_put_opp_table(opp_table); - opp_table = NULL; + opp_table = ERR_PTR(-ENOMEM); } goto unlock; } @@ -1568,8 +1577,8 @@ struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, struct opp_table *opp_table; opp_table = dev_pm_opp_get_opp_table(dev); - if (!opp_table) - return ERR_PTR(-ENOMEM); + if (IS_ERR(opp_table)) + return opp_table; /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); @@ -1627,8 +1636,8 @@ struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) struct opp_table *opp_table; opp_table = dev_pm_opp_get_opp_table(dev); - if (!opp_table) - return ERR_PTR(-ENOMEM); + if (IS_ERR(opp_table)) + return opp_table; /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); @@ -1720,8 +1729,8 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev, int ret, i; opp_table = dev_pm_opp_get_opp_table(dev); - if (!opp_table) - return ERR_PTR(-ENOMEM); + if (IS_ERR(opp_table)) + return opp_table; /* This should be called before OPPs are initialized */ if (WARN_ON(!list_empty(&opp_table->opp_list))) { @@ -1830,8 +1839,8 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name) int ret; opp_table = dev_pm_opp_get_opp_table(dev); - if (!opp_table) - return ERR_PTR(-ENOMEM); + if (IS_ERR(opp_table)) + return opp_table; /* This should be called before OPPs are initialized */ if (WARN_ON(!list_empty(&opp_table->opp_list))) { @@ -1898,8 +1907,8 @@ struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, return ERR_PTR(-EINVAL); opp_table = dev_pm_opp_get_opp_table(dev); - if (!opp_table) - return ERR_PTR(-ENOMEM); + if (!IS_ERR(opp_table)) + return opp_table; /* This should be called before OPPs are initialized */ if (WARN_ON(!list_empty(&opp_table->opp_list))) { @@ -1979,8 +1988,8 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **name = names; opp_table = dev_pm_opp_get_opp_table(dev); - if (!opp_table) - return ERR_PTR(-ENOMEM); + if (IS_ERR(opp_table)) + return opp_table; /* * If the genpd's OPP table isn't already initialized, parsing of the @@ -2150,8 +2159,8 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) int ret; opp_table = dev_pm_opp_get_opp_table(dev); - if (!opp_table) - return -ENOMEM; + if (IS_ERR(opp_table)) + return PTR_ERR(opp_table); /* Fix regulator count for dynamic OPPs */ opp_table->regulator_count = 1; diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 0430290670ab39..d8b623cc015ae9 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -947,8 +947,8 @@ int dev_pm_opp_of_add_table(struct device *dev) int ret; opp_table = dev_pm_opp_get_opp_table_indexed(dev, 0); - if (!opp_table) - return -ENOMEM; + if (IS_ERR(opp_table)) + return PTR_ERR(opp_table); /* * OPPs have two version of bindings now. Also try the old (v1) @@ -1002,8 +1002,8 @@ int dev_pm_opp_of_add_table_indexed(struct device *dev, int index) } opp_table = dev_pm_opp_get_opp_table_indexed(dev, index); - if (!opp_table) - return -ENOMEM; + if (IS_ERR(opp_table)) + return PTR_ERR(opp_table); ret = _of_add_opp_table_v2(dev, opp_table); if (ret) diff --git a/drivers/soc/samsung/exynos-asv.c b/drivers/soc/samsung/exynos-asv.c index 30bb7b7cc7698a..8abf4dfaa5c59d 100644 --- a/drivers/soc/samsung/exynos-asv.c +++ b/drivers/soc/samsung/exynos-asv.c @@ -93,7 +93,7 @@ static int exynos_asv_update_opps(struct exynos_asv *asv) continue; opp_table = dev_pm_opp_get_opp_table(cpu); - if (IS_ERR_OR_NULL(opp_table)) + if (IS_ERR(opp_table)) continue; if (!last_opp_table || opp_table != last_opp_table) { From ded10c47f39e0f850d53f5a2b418fbafdef53347 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 20 Aug 2020 15:46:20 +0530 Subject: [PATCH 06/65] cpufreq: imx6q: Unconditionally call dev_pm_opp_of_remove_table() dev_pm_opp_of_remove_table() doesn't report any errors when it fails to find the OPP table with error -ENODEV (i.e. OPP table not present for the device). And we can call dev_pm_opp_of_remove_table() unconditionally here. Signed-off-by: Viresh Kumar --- drivers/cpufreq/imx6q-cpufreq.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index ef7b34c1fd2ba1..5bf5fc759881fa 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -48,7 +48,6 @@ static struct clk_bulk_data clks[] = { }; static struct device *cpu_dev; -static bool free_opp; static struct cpufreq_frequency_table *freq_table; static unsigned int max_freq; static unsigned int transition_latency; @@ -390,9 +389,6 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) goto put_reg; } - /* Because we have added the OPPs here, we must free them */ - free_opp = true; - if (of_machine_is_compatible("fsl,imx6ul") || of_machine_is_compatible("fsl,imx6ull")) { ret = imx6ul_opp_check_speed_grading(cpu_dev); @@ -507,8 +503,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) free_freq_table: dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); out_free_opp: - if (free_opp) - dev_pm_opp_of_remove_table(cpu_dev); + dev_pm_opp_of_remove_table(cpu_dev); put_reg: if (!IS_ERR(arm_reg)) regulator_put(arm_reg); @@ -528,8 +523,7 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&imx6q_cpufreq_driver); dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); - if (free_opp) - dev_pm_opp_of_remove_table(cpu_dev); + dev_pm_opp_of_remove_table(cpu_dev); regulator_put(arm_reg); if (!IS_ERR(pu_reg)) regulator_put(pu_reg); From 90d46d71cce279d878793a0ed4b326b4027aca6c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 1 Sep 2020 15:07:09 +0530 Subject: [PATCH 07/65] opp: Handle multiple calls for same OPP table in _of_add_opp_table_v1() Until now for V1 OPP bindings we used to call dev_pm_opp_of_cpumask_add_table() first and then dev_pm_opp_set_sharing_cpus() in the cpufreq-dt driver. A later patch will though update the cpufreq-dt driver to optimize the code a bit and we will call dev_pm_opp_set_sharing_cpus() first followed by dev_pm_opp_of_cpumask_add_table(), which doesn't work well today as it tries to re parse the OPP entries. This should work nevertheless for V1 bindings as the same works for V2 bindings. Adapt the same approach from V2 bindings and fix this. Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index d8b623cc015ae9..01a9f1ff5bb003 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -886,11 +886,25 @@ static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table) const __be32 *val; int nr, ret = 0; + mutex_lock(&opp_table->lock); + if (opp_table->parsed_static_opps) { + opp_table->parsed_static_opps++; + mutex_unlock(&opp_table->lock); + return 0; + } + + opp_table->parsed_static_opps = 1; + mutex_unlock(&opp_table->lock); + prop = of_find_property(dev->of_node, "operating-points", NULL); - if (!prop) - return -ENODEV; - if (!prop->value) - return -ENODATA; + if (!prop) { + ret = -ENODEV; + goto remove_static_opp; + } + if (!prop->value) { + ret = -ENODATA; + goto remove_static_opp; + } /* * Each OPP is a set of tuples consisting of frequency and @@ -899,13 +913,10 @@ static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table) nr = prop->length / sizeof(u32); if (nr % 2) { dev_err(dev, "%s: Invalid OPP table\n", __func__); - return -EINVAL; + ret = -EINVAL; + goto remove_static_opp; } - mutex_lock(&opp_table->lock); - opp_table->parsed_static_opps = 1; - mutex_unlock(&opp_table->lock); - val = prop->value; while (nr) { unsigned long freq = be32_to_cpup(val++) * 1000; @@ -915,12 +926,14 @@ static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table) if (ret) { dev_err(dev, "%s: Failed to add OPP %ld (%d)\n", __func__, freq, ret); - _opp_remove_all_static(opp_table); - return ret; + goto remove_static_opp; } nr -= 2; } +remove_static_opp: + _opp_remove_all_static(opp_table); + return ret; } From dc279ac6e5b4e06ec9c15b82e30e8bf2576b14f9 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 27 Jul 2020 11:30:47 +0200 Subject: [PATCH 08/65] cpufreq: dt: Refactor initialization to handle probe deferral properly cpufreq-dt is currently unable to handle -EPROBE_DEFER properly because the error code is not propagated for the cpufreq_driver->init() callback. Instead, it attempts to avoid the situation by temporarily requesting all resources within resources_available() and releasing them again immediately after. This has several disadvantages: - Whenever we add something like interconnect handling to the OPP core we need to patch cpufreq-dt to request these resources early. - resources_available() is only run for CPU0, but other clusters may eventually depend on other resources that are not available yet. (See FIXME comment removed by this commit...) - All resources need to be looked up several times. Now that the OPP core can propagate -EPROBE_DEFER during initialization, it would be nice to avoid all that trouble and just propagate its error code when necessary. This commit refactors the cpufreq-dt driver to initialize private_data before registering the cpufreq driver. We do this by iterating over all possible CPUs and ensure that all resources are initialized: 1. dev_pm_opp_get_opp_table() ensures the OPP table is allocated and initialized with clock and interconnects. 2. dev_pm_opp_set_regulators() requests the regulators and assigns them to the OPP table. 3. We call dev_pm_opp_of_get_sharing_cpus() early so that we only initialize the OPP table once for each shared policy. With these changes, we actually end up saving a few lines of code, the resources are no longer looked up multiple times and everything should be much more robust. Signed-off-by: Stephan Gerhold [ Viresh: Use list_head structure for maintaining the list and minor changes ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt.c | 286 +++++++++++++++++------------------ 1 file changed, 143 insertions(+), 143 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 944d7b45afe9a9..f0c2d3876dec8c 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -24,18 +25,35 @@ #include "cpufreq-dt.h" struct private_data { - struct opp_table *opp_table; + struct list_head node; + + cpumask_var_t cpus; struct device *cpu_dev; - const char *reg_name; + struct opp_table *opp_table; + struct opp_table *reg_opp_table; bool have_static_opps; }; +static LIST_HEAD(priv_list); + static struct freq_attr *cpufreq_dt_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, /* Extra space for boost-attr if required */ NULL, }; +static struct private_data *cpufreq_dt_find_data(int cpu) +{ + struct private_data *priv; + + list_for_each_entry(priv, &priv_list, node) { + if (cpumask_test_cpu(cpu, priv->cpus)) + return priv; + } + + return NULL; +} + static int set_target(struct cpufreq_policy *policy, unsigned int index) { struct private_data *priv = policy->driver_data; @@ -90,83 +108,24 @@ static const char *find_supply_name(struct device *dev) return name; } -static int resources_available(void) -{ - struct device *cpu_dev; - struct regulator *cpu_reg; - struct clk *cpu_clk; - int ret = 0; - const char *name; - - cpu_dev = get_cpu_device(0); - if (!cpu_dev) { - pr_err("failed to get cpu0 device\n"); - return -ENODEV; - } - - cpu_clk = clk_get(cpu_dev, NULL); - ret = PTR_ERR_OR_ZERO(cpu_clk); - if (ret) { - /* - * If cpu's clk node is present, but clock is not yet - * registered, we should try defering probe. - */ - if (ret == -EPROBE_DEFER) - dev_dbg(cpu_dev, "clock not ready, retry\n"); - else - dev_err(cpu_dev, "failed to get clock: %d\n", ret); - - return ret; - } - - clk_put(cpu_clk); - - ret = dev_pm_opp_of_find_icc_paths(cpu_dev, NULL); - if (ret) - return ret; - - name = find_supply_name(cpu_dev); - /* Platform doesn't require regulator */ - if (!name) - return 0; - - cpu_reg = regulator_get_optional(cpu_dev, name); - ret = PTR_ERR_OR_ZERO(cpu_reg); - if (ret) { - /* - * If cpu's regulator supply node is present, but regulator is - * not yet registered, we should try defering probe. - */ - if (ret == -EPROBE_DEFER) - dev_dbg(cpu_dev, "cpu0 regulator not ready, retry\n"); - else - dev_dbg(cpu_dev, "no regulator for cpu0: %d\n", ret); - - return ret; - } - - regulator_put(cpu_reg); - return 0; -} - static int cpufreq_init(struct cpufreq_policy *policy) { struct cpufreq_frequency_table *freq_table; - struct opp_table *opp_table = NULL; struct private_data *priv; struct device *cpu_dev; struct clk *cpu_clk; unsigned int transition_latency; - bool fallback = false; - const char *name; int ret; - cpu_dev = get_cpu_device(policy->cpu); - if (!cpu_dev) { - pr_err("failed to get cpu%d device\n", policy->cpu); + priv = cpufreq_dt_find_data(policy->cpu); + if (!priv) { + pr_err("failed to find data for cpu%d\n", policy->cpu); return -ENODEV; } + cpu_dev = priv->cpu_dev; + cpumask_copy(policy->cpus, priv->cpus); + cpu_clk = clk_get(cpu_dev, NULL); if (IS_ERR(cpu_clk)) { ret = PTR_ERR(cpu_clk); @@ -174,45 +133,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) return ret; } - /* Get OPP-sharing information from "operating-points-v2" bindings */ - ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, policy->cpus); - if (ret) { - if (ret != -ENOENT) - goto out_put_clk; - - /* - * operating-points-v2 not supported, fallback to old method of - * finding shared-OPPs for backward compatibility if the - * platform hasn't set sharing CPUs. - */ - if (dev_pm_opp_get_sharing_cpus(cpu_dev, policy->cpus)) - fallback = true; - } - - /* - * OPP layer will be taking care of regulators now, but it needs to know - * the name of the regulator first. - */ - name = find_supply_name(cpu_dev); - if (name) { - opp_table = dev_pm_opp_set_regulators(cpu_dev, &name, 1); - if (IS_ERR(opp_table)) { - ret = PTR_ERR(opp_table); - dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", - policy->cpu, ret); - goto out_put_clk; - } - } - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) { - ret = -ENOMEM; - goto out_put_regulator; - } - - priv->reg_name = name; - priv->opp_table = opp_table; - /* * Initialize OPP tables for all policy->cpus. They will be shared by * all CPUs which have marked their CPUs shared with OPP bindings. @@ -232,31 +152,17 @@ static int cpufreq_init(struct cpufreq_policy *policy) */ ret = dev_pm_opp_get_opp_count(cpu_dev); if (ret <= 0) { - dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n"); - ret = -EPROBE_DEFER; + dev_err(cpu_dev, "OPP table can't be empty\n"); + ret = -ENODEV; goto out_free_opp; } - if (fallback) { - cpumask_setall(policy->cpus); - - /* - * OPP tables are initialized only for policy->cpu, do it for - * others as well. - */ - ret = dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus); - if (ret) - dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", - __func__, ret); - } - ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); goto out_free_opp; } - priv->cpu_dev = cpu_dev; policy->driver_data = priv; policy->clk = cpu_clk; policy->freq_table = freq_table; @@ -288,11 +194,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) out_free_opp: if (priv->have_static_opps) dev_pm_opp_of_cpumask_remove_table(policy->cpus); - kfree(priv); -out_put_regulator: - if (name) - dev_pm_opp_put_regulators(opp_table); -out_put_clk: clk_put(cpu_clk); return ret; @@ -320,12 +221,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy) dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); if (priv->have_static_opps) dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); - if (priv->reg_name) - dev_pm_opp_put_regulators(priv->opp_table); - clk_put(policy->clk); - kfree(priv); - return 0; } @@ -344,21 +240,119 @@ static struct cpufreq_driver dt_cpufreq_driver = { .suspend = cpufreq_generic_suspend, }; -static int dt_cpufreq_probe(struct platform_device *pdev) +static int dt_cpufreq_early_init(struct device *dev, int cpu) { - struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev); + struct private_data *priv; + struct device *cpu_dev; + const char *reg_name; int ret; + /* Check if this CPU is already covered by some other policy */ + if (cpufreq_dt_find_data(cpu)) + return 0; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return -EPROBE_DEFER; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + if (!alloc_cpumask_var(&priv->cpus, GFP_KERNEL)) + return -ENOMEM; + + priv->cpu_dev = cpu_dev; + + /* Try to get OPP table early to ensure resources are available */ + priv->opp_table = dev_pm_opp_get_opp_table(cpu_dev); + if (IS_ERR(priv->opp_table)) { + ret = PTR_ERR(priv->opp_table); + if (ret != -EPROBE_DEFER) + dev_err(cpu_dev, "failed to get OPP table: %d\n", ret); + goto free_cpumask; + } + /* - * All per-cluster (CPUs sharing clock/voltages) initialization is done - * from ->init(). In probe(), we just need to make sure that clk and - * regulators are available. Else defer probe and retry. - * - * FIXME: Is checking this only for CPU0 sufficient ? + * OPP layer will be taking care of regulators now, but it needs to know + * the name of the regulator first. */ - ret = resources_available(); - if (ret) - return ret; + reg_name = find_supply_name(cpu_dev); + if (reg_name) { + priv->reg_opp_table = dev_pm_opp_set_regulators(cpu_dev, + ®_name, 1); + if (IS_ERR(priv->reg_opp_table)) { + ret = PTR_ERR(priv->reg_opp_table); + if (ret != -EPROBE_DEFER) + dev_err(cpu_dev, "failed to set regulators: %d\n", + ret); + goto put_table; + } + } + + /* Find OPP sharing information so we can fill pri->cpus here */ + /* Get OPP-sharing information from "operating-points-v2" bindings */ + ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, priv->cpus); + if (ret) { + if (ret != -ENOENT) + goto put_reg; + + /* + * operating-points-v2 not supported, fallback to all CPUs share + * OPP for backward compatibility if the platform hasn't set + * sharing CPUs. + */ + if (dev_pm_opp_get_sharing_cpus(cpu_dev, priv->cpus)) { + cpumask_setall(priv->cpus); + + /* + * OPP tables are initialized only for cpu, do it for + * others as well. + */ + ret = dev_pm_opp_set_sharing_cpus(cpu_dev, priv->cpus); + if (ret) + dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", + __func__, ret); + } + } + + list_add(&priv->node, &priv_list); + return 0; + +put_reg: + if (priv->reg_opp_table) + dev_pm_opp_put_regulators(priv->reg_opp_table); +put_table: + dev_pm_opp_put_opp_table(priv->opp_table); +free_cpumask: + free_cpumask_var(priv->cpus); + return ret; +} + +static void dt_cpufreq_release(void) +{ + struct private_data *priv, *tmp; + + list_for_each_entry_safe(priv, tmp, &priv_list, node) { + if (priv->reg_opp_table) + dev_pm_opp_put_regulators(priv->reg_opp_table); + dev_pm_opp_put_opp_table(priv->opp_table); + free_cpumask_var(priv->cpus); + list_del(&priv->node); + } +} + +static int dt_cpufreq_probe(struct platform_device *pdev) +{ + struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev); + int ret, cpu; + + /* Request resources early so we can return in case of -EPROBE_DEFER */ + for_each_possible_cpu(cpu) { + ret = dt_cpufreq_early_init(&pdev->dev, cpu); + if (ret) + goto err; + } if (data) { if (data->have_governor_per_policy) @@ -374,15 +368,21 @@ static int dt_cpufreq_probe(struct platform_device *pdev) } ret = cpufreq_register_driver(&dt_cpufreq_driver); - if (ret) + if (ret) { dev_err(&pdev->dev, "failed register driver: %d\n", ret); + goto err; + } + return 0; +err: + dt_cpufreq_release(); return ret; } static int dt_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&dt_cpufreq_driver); + dt_cpufreq_release(); return 0; } From 475ac8ead803e8c3a8f492e3129b1a19d7e2efd7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 27 Aug 2020 15:06:43 +0530 Subject: [PATCH 09/65] opp: Drop unnecessary check from dev_pm_opp_attach_genpd() Since commit c0ab9e0812da ("opp: Allocate genpd_virt_devs from dev_pm_opp_attach_genpd()"), the allocation of the virtual devices is moved to dev_pm_opp_attach_genpd() and this check isn't required anymore as it will always fail. Drop it. Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 8c69a764d0a466..fa97a875eb4781 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2019,12 +2019,6 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, goto err; } - if (opp_table->genpd_virt_devs[index]) { - dev_err(dev, "Genpd virtual device already set %s\n", - *name); - goto err; - } - virt_dev = dev_pm_domain_attach_by_name(dev, *name); if (IS_ERR(virt_dev)) { ret = PTR_ERR(virt_dev); From 60cdeae0d627eca4ae616e3097a1f00ac9f3d704 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Thu, 30 Jul 2020 10:01:44 +0200 Subject: [PATCH 10/65] opp: Reduce code duplication in _set_required_opps() Move call to dev_pm_genpd_set_performance_state() to a separate function so we can avoid duplicating the code for the single and multiple genpd case. Signed-off-by: Stephan Gerhold [ Viresh: Validate virtual device before use ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index fa97a875eb4781..79c873344895b7 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -779,6 +779,24 @@ static int _set_opp_custom(const struct opp_table *opp_table, return opp_table->set_opp(data); } +static int _set_required_opp(struct device *dev, struct device *pd_dev, + struct dev_pm_opp *opp, int i) +{ + unsigned int pstate = likely(opp) ? opp->required_opps[i]->pstate : 0; + int ret; + + if (!pd_dev) + return 0; + + ret = dev_pm_genpd_set_performance_state(pd_dev, pstate); + if (ret) { + dev_err(dev, "Failed to set performance rate of %s: %d (%d)\n", + dev_name(pd_dev), pstate, ret); + } + + return ret; +} + /* This is only called for PM domain for now */ static int _set_required_opps(struct device *dev, struct opp_table *opp_table, @@ -786,22 +804,14 @@ static int _set_required_opps(struct device *dev, { struct opp_table **required_opp_tables = opp_table->required_opp_tables; struct device **genpd_virt_devs = opp_table->genpd_virt_devs; - unsigned int pstate; int i, ret = 0; if (!required_opp_tables) return 0; /* Single genpd case */ - if (!genpd_virt_devs) { - pstate = likely(opp) ? opp->required_opps[0]->pstate : 0; - ret = dev_pm_genpd_set_performance_state(dev, pstate); - if (ret) { - dev_err(dev, "Failed to set performance state of %s: %d (%d)\n", - dev_name(dev), pstate, ret); - } - return ret; - } + if (!genpd_virt_devs) + return _set_required_opp(dev, dev, opp, 0); /* Multiple genpd case */ @@ -810,19 +820,10 @@ static int _set_required_opps(struct device *dev, * after it is freed from another thread. */ mutex_lock(&opp_table->genpd_virt_dev_lock); - for (i = 0; i < opp_table->required_opp_count; i++) { - pstate = likely(opp) ? opp->required_opps[i]->pstate : 0; - - if (!genpd_virt_devs[i]) - continue; - - ret = dev_pm_genpd_set_performance_state(genpd_virt_devs[i], pstate); - if (ret) { - dev_err(dev, "Failed to set performance rate of %s: %d (%d)\n", - dev_name(genpd_virt_devs[i]), pstate, ret); + ret = _set_required_opp(dev, genpd_virt_devs[i], opp, i); + if (ret) break; - } } mutex_unlock(&opp_table->genpd_virt_dev_lock); From 2c59138c22f17c1da027d3c90dbcdd6995c77414 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Thu, 30 Jul 2020 10:01:45 +0200 Subject: [PATCH 11/65] opp: Set required OPPs in reverse order when scaling down The OPP core already has well-defined semantics to ensure required OPPs/regulators are set before/after the frequency change, depending on if we scale up or down. Similar requirements might exist for the order of required OPPs when multiple power domains need to be scaled for a frequency change. For example, on Qualcomm platforms using CPR (Core Power Reduction), we need to scale the VDDMX and CPR power domain. When scaling up, MX should be scaled up before CPR. When scaling down, CPR should be scaled down before MX. In general, if there are multiple "required-opps" in the device tree I would expect that the order is either irrelevant, or there is some dependency between the power domains. In that case, the power domains should be scaled down in reverse order. This commit updates _set_required_opps() to set required OPPs in reverse order when scaling down. Signed-off-by: Stephan Gerhold [ Viresh: Fix rebase conflict and minor rearrangement of the code ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 79c873344895b7..000d0fcb468097 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -800,7 +800,7 @@ static int _set_required_opp(struct device *dev, struct device *pd_dev, /* This is only called for PM domain for now */ static int _set_required_opps(struct device *dev, struct opp_table *opp_table, - struct dev_pm_opp *opp) + struct dev_pm_opp *opp, bool up) { struct opp_table **required_opp_tables = opp_table->required_opp_tables; struct device **genpd_virt_devs = opp_table->genpd_virt_devs; @@ -820,11 +820,22 @@ static int _set_required_opps(struct device *dev, * after it is freed from another thread. */ mutex_lock(&opp_table->genpd_virt_dev_lock); - for (i = 0; i < opp_table->required_opp_count; i++) { - ret = _set_required_opp(dev, genpd_virt_devs[i], opp, i); - if (ret) - break; + + /* Scaling up? Set required OPPs in normal order, else reverse */ + if (up) { + for (i = 0; i < opp_table->required_opp_count; i++) { + ret = _set_required_opp(dev, genpd_virt_devs[i], opp, i); + if (ret) + break; + } + } else { + for (i = opp_table->required_opp_count - 1; i >= 0; i--) { + ret = _set_required_opp(dev, genpd_virt_devs[i], opp, i); + if (ret) + break; + } } + mutex_unlock(&opp_table->genpd_virt_dev_lock); return ret; @@ -883,7 +894,7 @@ static int _opp_set_rate_zero(struct device *dev, struct opp_table *opp_table) if (opp_table->regulators) regulator_disable(opp_table->regulators[0]); - ret = _set_required_opps(dev, opp_table, NULL); + ret = _set_required_opps(dev, opp_table, NULL, false); opp_table->enabled = false; return ret; @@ -974,7 +985,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) /* Scaling up? Configure required OPPs before frequency */ if (freq >= old_freq) { - ret = _set_required_opps(dev, opp_table, opp); + ret = _set_required_opps(dev, opp_table, opp, true); if (ret) goto put_opp; } @@ -994,7 +1005,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) /* Scaling down? Configure required OPPs after frequency */ if (!ret && freq < old_freq) { - ret = _set_required_opps(dev, opp_table, opp); + ret = _set_required_opps(dev, opp_table, opp, false); if (ret) dev_err(dev, "Failed to set required opps: %d\n", ret); } From 4461269572e6fc216b63e2254c83035d1bc21eb7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 26 Aug 2020 15:09:16 +0530 Subject: [PATCH 12/65] dt-bindings: opp: Allow opp-supported-hw to contain multiple versions A single list of versions for a hierarchy of hardware levels is not sufficient in some cases. For example, if the hardware version has two levels, i.e. X.Y and we want an OPP to support only version 2.1 and 1.2, we will set the property as: opp-supported-hw = <0x00000003 0x00000003>; What this also does is enable hardware versions 2.2 and 1.1, which we don't want. Extend the property to accept multiple versions, so we can define the property as: opp-supported-hw = <0x00000002 0x00000001>, <0x00000001 0x00000002>; While at it, also reword the property description. Reported-by: Stephan Gerhold Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/opp.txt | 53 +++++++++++-------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 9d16d417e9be40..9847dfeeffcba0 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -154,25 +154,27 @@ Optional properties: - opp-suspend: Marks the OPP to be used during device suspend. If multiple OPPs in the table have this, the OPP with highest opp-hz will be used. -- opp-supported-hw: This enables us to select only a subset of OPPs from the - larger OPP table, based on what version of the hardware we are running on. We - still can't have multiple nodes with the same opp-hz value in OPP table. - - It's a user defined array containing a hierarchy of hardware version numbers, - supported by the OPP. For example: a platform with hierarchy of three levels - of versions (A, B and C), this field should be like , where X - corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z - corresponds to version hierarchy C. - - Each level of hierarchy is represented by a 32 bit value, and so there can be - only 32 different supported version per hierarchy. i.e. 1 bit per version. A - value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy - level. And a value of 0x00000000 will disable the OPP completely, and so we - never want that to happen. - - If 32 values aren't sufficient for a version hierarchy, than that version - hierarchy can be contained in multiple 32 bit values. i.e. in the - above example, Z1 & Z2 refer to the version hierarchy Z. +- opp-supported-hw: This property allows a platform to enable only a subset of + the OPPs from the larger set present in the OPP table, based on the current + version of the hardware (already known to the operating system). + + Each block present in the array of blocks in this property, represents a + sub-group of hardware versions supported by the OPP. i.e. , + , etc. The OPP will be enabled if _any_ of these sub-groups match + the hardware's version. + + Each sub-group is a platform defined array representing the hierarchy of + hardware versions supported by the platform. For a platform with three + hierarchical levels of version (X.Y.Z), this field shall look like + + opp-supported-hw = , , . + + Each level (eg. X1) in version hierarchy is represented by a 32 bit value, one + bit per version and so there can be maximum 32 versions per level. Logical AND + (&) operation is performed for each level with the hardware's level version + and a non-zero output for _all_ the levels in a sub-group means the OPP is + supported by hardware. A value of 0xFFFFFFFF for each level in the sub-group + will enable the OPP for all versions for the hardware. - status: Marks the node enabled/disabled. @@ -503,7 +505,6 @@ Example 5: opp-supported-hw */ opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF> opp-hz = /bits/ 64 <600000000>; - opp-microvolt = <915000 900000 925000>; ... }; @@ -516,7 +517,17 @@ Example 5: opp-supported-hw */ opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0> opp-hz = /bits/ 64 <800000000>; - opp-microvolt = <915000 900000 925000>; + ... + }; + + opp-900000000 { + /* + * Supports: + * - All cuts and substrate where process version is 0x2. + * - All cuts and process where substrate version is 0x2. + */ + opp-supported-hw = <0xFFFFFFFF 0xFFFFFFFF 0x02>, <0xFFFFFFFF 0x01 0xFFFFFFFF> + opp-hz = /bits/ 64 <900000000>; ... }; }; From 0ff25c99042a56cd1580b381dd747a56286489cd Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 26 Aug 2020 16:32:27 +0530 Subject: [PATCH 13/65] opp: Allow opp-supported-hw to contain multiple versions The bindings allow multiple versions to be passed to "opp-supported-hw" property, either of which can result in enabling of the OPP. Update code to allow that. Tested-by: Stephan Gerhold Tested-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 2f294c57ec06d9..aa829a569825e4 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -434,9 +434,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_find_icc_paths); static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, struct device_node *np) { - unsigned int count = opp_table->supported_hw_count; - u32 version; - int ret; + unsigned int levels = opp_table->supported_hw_count; + int count, versions, ret, i, j; + u32 val; if (!opp_table->supported_hw) { /* @@ -451,21 +451,40 @@ static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, return true; } - while (count--) { - ret = of_property_read_u32_index(np, "opp-supported-hw", count, - &version); - if (ret) { - dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n", - __func__, count, ret); - return false; + count = of_property_count_u32_elems(np, "opp-supported-hw"); + if (count <= 0 || count % levels) { + dev_err(dev, "%s: Invalid opp-supported-hw property (%d)\n", + __func__, count); + return false; + } + + versions = count / levels; + + /* All levels in at least one of the versions should match */ + for (i = 0; i < versions; i++) { + bool supported = true; + + for (j = 0; j < levels; j++) { + ret = of_property_read_u32_index(np, "opp-supported-hw", + i * levels + j, &val); + if (ret) { + dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n", + __func__, i * levels + j, ret); + return false; + } + + /* Check if the level is supported */ + if (!(val & opp_table->supported_hw[j])) { + supported = false; + break; + } } - /* Both of these are bitwise masks of the versions */ - if (!(version & opp_table->supported_hw[count])) - return false; + if (supported) + return true; } - return true; + return false; } static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, From 7162fc2e4edae990e04ea7b0ad7f0106d97c9b41 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 26 Aug 2020 17:14:08 +0530 Subject: [PATCH 14/65] ARM: tegra: Pass multiple versions in opp-supported-hw property We can now pass multiple versions in "opp-supported-hw" property, lets do that and simplify the tables a bit. Reviewed-by: Dmitry Osipenko Tested-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- .../boot/dts/tegra20-cpu-opp-microvolt.dtsi | 36 - arch/arm/boot/dts/tegra20-cpu-opp.dtsi | 67 +- .../boot/dts/tegra30-cpu-opp-microvolt.dtsi | 512 --------- arch/arm/boot/dts/tegra30-cpu-opp.dtsi | 984 +++--------------- 4 files changed, 147 insertions(+), 1452 deletions(-) diff --git a/arch/arm/boot/dts/tegra20-cpu-opp-microvolt.dtsi b/arch/arm/boot/dts/tegra20-cpu-opp-microvolt.dtsi index dce85d39480d26..6f3e8c5fc5f0dd 100644 --- a/arch/arm/boot/dts/tegra20-cpu-opp-microvolt.dtsi +++ b/arch/arm/boot/dts/tegra20-cpu-opp-microvolt.dtsi @@ -26,14 +26,6 @@ opp-microvolt = <800000 800000 1125000>; }; - opp@456000000,800,2,2 { - opp-microvolt = <800000 800000 1125000>; - }; - - opp@456000000,800,3,2 { - opp-microvolt = <800000 800000 1125000>; - }; - opp@456000000,825 { opp-microvolt = <825000 825000 1125000>; }; @@ -46,10 +38,6 @@ opp-microvolt = <800000 800000 1125000>; }; - opp@608000000,800,3,2 { - opp-microvolt = <800000 800000 1125000>; - }; - opp@608000000,825 { opp-microvolt = <825000 825000 1125000>; }; @@ -78,18 +66,6 @@ opp-microvolt = <875000 875000 1125000>; }; - opp@760000000,875,1,1 { - opp-microvolt = <875000 875000 1125000>; - }; - - opp@760000000,875,0,2 { - opp-microvolt = <875000 875000 1125000>; - }; - - opp@760000000,875,1,2 { - opp-microvolt = <875000 875000 1125000>; - }; - opp@760000000,900 { opp-microvolt = <900000 900000 1125000>; }; @@ -134,14 +110,6 @@ opp-microvolt = <950000 950000 1125000>; }; - opp@912000000,950,0,2 { - opp-microvolt = <950000 950000 1125000>; - }; - - opp@912000000,950,2,2 { - opp-microvolt = <950000 950000 1125000>; - }; - opp@912000000,1000 { opp-microvolt = <1000000 1000000 1125000>; }; @@ -170,10 +138,6 @@ opp-microvolt = <1000000 1000000 1125000>; }; - opp@1000000000,1000,0,2 { - opp-microvolt = <1000000 1000000 1125000>; - }; - opp@1000000000,1025 { opp-microvolt = <1025000 1025000 1125000>; }; diff --git a/arch/arm/boot/dts/tegra20-cpu-opp.dtsi b/arch/arm/boot/dts/tegra20-cpu-opp.dtsi index 9b8fedb57a1b4a..702a635e88e7ae 100644 --- a/arch/arm/boot/dts/tegra20-cpu-opp.dtsi +++ b/arch/arm/boot/dts/tegra20-cpu-opp.dtsi @@ -37,19 +37,8 @@ opp@456000000,800 { clock-latency-ns = <400000>; - opp-supported-hw = <0x03 0x0006>; - opp-hz = /bits/ 64 <456000000>; - }; - - opp@456000000,800,2,2 { - clock-latency-ns = <400000>; - opp-supported-hw = <0x04 0x0004>; - opp-hz = /bits/ 64 <456000000>; - }; - - opp@456000000,800,3,2 { - clock-latency-ns = <400000>; - opp-supported-hw = <0x08 0x0004>; + opp-supported-hw = <0x03 0x0006>, <0x04 0x0004>, + <0x08 0x0004>; opp-hz = /bits/ 64 <456000000>; }; @@ -67,13 +56,7 @@ opp@608000000,800 { clock-latency-ns = <400000>; - opp-supported-hw = <0x04 0x0006>; - opp-hz = /bits/ 64 <608000000>; - }; - - opp@608000000,800,3,2 { - clock-latency-ns = <400000>; - opp-supported-hw = <0x08 0x0004>; + opp-supported-hw = <0x04 0x0006>, <0x08 0x0004>; opp-hz = /bits/ 64 <608000000>; }; @@ -115,25 +98,8 @@ opp@760000000,875 { clock-latency-ns = <400000>; - opp-supported-hw = <0x04 0x0001>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,875,1,1 { - clock-latency-ns = <400000>; - opp-supported-hw = <0x02 0x0002>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,875,0,2 { - clock-latency-ns = <400000>; - opp-supported-hw = <0x01 0x0004>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,875,1,2 { - clock-latency-ns = <400000>; - opp-supported-hw = <0x02 0x0004>; + opp-supported-hw = <0x04 0x0001>, <0x02 0x0002>, + <0x01 0x0004>, <0x02 0x0004>; opp-hz = /bits/ 64 <760000000>; }; @@ -199,19 +165,8 @@ opp@912000000,950 { clock-latency-ns = <400000>; - opp-supported-hw = <0x02 0x0006>; - opp-hz = /bits/ 64 <912000000>; - }; - - opp@912000000,950,0,2 { - clock-latency-ns = <400000>; - opp-supported-hw = <0x01 0x0004>; - opp-hz = /bits/ 64 <912000000>; - }; - - opp@912000000,950,2,2 { - clock-latency-ns = <400000>; - opp-supported-hw = <0x04 0x0004>; + opp-supported-hw = <0x02 0x0006>, <0x01 0x0004>, + <0x04 0x0004>; opp-hz = /bits/ 64 <912000000>; }; @@ -253,13 +208,7 @@ opp@1000000000,1000 { clock-latency-ns = <400000>; - opp-supported-hw = <0x02 0x0006>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,1000,0,2 { - clock-latency-ns = <400000>; - opp-supported-hw = <0x01 0x0004>; + opp-supported-hw = <0x02 0x0006>, <0x01 0x0004>; opp-hz = /bits/ 64 <1000000000>; }; diff --git a/arch/arm/boot/dts/tegra30-cpu-opp-microvolt.dtsi b/arch/arm/boot/dts/tegra30-cpu-opp-microvolt.dtsi index d682f7437146a9..1be715d2a442bb 100644 --- a/arch/arm/boot/dts/tegra30-cpu-opp-microvolt.dtsi +++ b/arch/arm/boot/dts/tegra30-cpu-opp-microvolt.dtsi @@ -74,22 +74,6 @@ opp-microvolt = <850000 850000 1250000>; }; - opp@475000000,850,0,1 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@475000000,850,0,4 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@475000000,850,0,7 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@475000000,850,0,8 { - opp-microvolt = <850000 850000 1250000>; - }; - opp@608000000,850 { opp-microvolt = <850000 850000 1250000>; }; @@ -106,62 +90,6 @@ opp-microvolt = <850000 850000 1250000>; }; - opp@640000000,850,1,1 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,2,1 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,3,1 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,1,4 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,2,4 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,3,4 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,1,7 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,2,7 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,3,7 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,4,7 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,1,8 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,2,8 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,3,8 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@640000000,850,4,8 { - opp-microvolt = <850000 850000 1250000>; - }; - opp@640000000,900 { opp-microvolt = <900000 900000 1250000>; }; @@ -170,94 +98,10 @@ opp-microvolt = <850000 850000 1250000>; }; - opp@760000000,850,3,1 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@760000000,850,3,2 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@760000000,850,3,3 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@760000000,850,3,4 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@760000000,850,3,7 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@760000000,850,4,7 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@760000000,850,3,8 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@760000000,850,4,8 { - opp-microvolt = <850000 850000 1250000>; - }; - - opp@760000000,850,0,10 { - opp-microvolt = <850000 850000 1250000>; - }; - opp@760000000,900 { opp-microvolt = <900000 900000 1250000>; }; - opp@760000000,900,1,1 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@760000000,900,2,1 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@760000000,900,1,2 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@760000000,900,2,2 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@760000000,900,1,3 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@760000000,900,2,3 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@760000000,900,1,4 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@760000000,900,2,4 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@760000000,900,1,7 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@760000000,900,2,7 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@760000000,900,1,8 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@760000000,900,2,8 { - opp-microvolt = <900000 900000 1250000>; - }; - opp@760000000,912 { opp-microvolt = <912000 912000 1250000>; }; @@ -282,90 +126,10 @@ opp-microvolt = <900000 900000 1250000>; }; - opp@860000000,900,2,1 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,3,1 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,2,2 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,3,2 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,2,3 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,3,3 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,2,4 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,3,4 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,2,7 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,3,7 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,4,7 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,2,8 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,3,8 { - opp-microvolt = <900000 900000 1250000>; - }; - - opp@860000000,900,4,8 { - opp-microvolt = <900000 900000 1250000>; - }; - opp@860000000,975 { opp-microvolt = <975000 975000 1250000>; }; - opp@860000000,975,1,1 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@860000000,975,1,2 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@860000000,975,1,3 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@860000000,975,1,4 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@860000000,975,1,7 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@860000000,975,1,8 { - opp-microvolt = <975000 975000 1250000>; - }; - opp@860000000,1000 { opp-microvolt = <1000000 1000000 1250000>; }; @@ -382,62 +146,6 @@ opp-microvolt = <975000 975000 1250000>; }; - opp@1000000000,975,2,1 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,3,1 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,2,2 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,3,2 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,2,3 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,3,3 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,2,4 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,3,4 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,2,7 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,3,7 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,4,7 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,2,8 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,3,8 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1000000000,975,4,8 { - opp-microvolt = <975000 975000 1250000>; - }; - opp@1000000000,1000 { opp-microvolt = <1000000 1000000 1250000>; }; @@ -454,66 +162,10 @@ opp-microvolt = <975000 975000 1250000>; }; - opp@1100000000,975,3,1 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1100000000,975,3,2 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1100000000,975,3,3 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1100000000,975,3,4 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1100000000,975,3,7 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1100000000,975,4,7 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1100000000,975,3,8 { - opp-microvolt = <975000 975000 1250000>; - }; - - opp@1100000000,975,4,8 { - opp-microvolt = <975000 975000 1250000>; - }; - opp@1100000000,1000 { opp-microvolt = <1000000 1000000 1250000>; }; - opp@1100000000,1000,2,1 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1100000000,1000,2,2 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1100000000,1000,2,3 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1100000000,1000,2,4 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1100000000,1000,2,7 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1100000000,1000,2,8 { - opp-microvolt = <1000000 1000000 1250000>; - }; - opp@1100000000,1025 { opp-microvolt = <1025000 1025000 1250000>; }; @@ -534,66 +186,10 @@ opp-microvolt = <1000000 1000000 1250000>; }; - opp@1200000000,1000,3,1 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1200000000,1000,3,2 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1200000000,1000,3,3 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1200000000,1000,3,4 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1200000000,1000,3,7 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1200000000,1000,4,7 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1200000000,1000,3,8 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1200000000,1000,4,8 { - opp-microvolt = <1000000 1000000 1250000>; - }; - opp@1200000000,1025 { opp-microvolt = <1025000 1025000 1250000>; }; - opp@1200000000,1025,2,1 { - opp-microvolt = <1025000 1025000 1250000>; - }; - - opp@1200000000,1025,2,2 { - opp-microvolt = <1025000 1025000 1250000>; - }; - - opp@1200000000,1025,2,3 { - opp-microvolt = <1025000 1025000 1250000>; - }; - - opp@1200000000,1025,2,4 { - opp-microvolt = <1025000 1025000 1250000>; - }; - - opp@1200000000,1025,2,7 { - opp-microvolt = <1025000 1025000 1250000>; - }; - - opp@1200000000,1025,2,8 { - opp-microvolt = <1025000 1025000 1250000>; - }; - opp@1200000000,1050 { opp-microvolt = <1050000 1050000 1250000>; }; @@ -610,90 +206,18 @@ opp-microvolt = <1000000 1000000 1250000>; }; - opp@1300000000,1000,4,7 { - opp-microvolt = <1000000 1000000 1250000>; - }; - - opp@1300000000,1000,4,8 { - opp-microvolt = <1000000 1000000 1250000>; - }; - opp@1300000000,1025 { opp-microvolt = <1025000 1025000 1250000>; }; - opp@1300000000,1025,3,1 { - opp-microvolt = <1025000 1025000 1250000>; - }; - - opp@1300000000,1025,3,7 { - opp-microvolt = <1025000 1025000 1250000>; - }; - - opp@1300000000,1025,3,8 { - opp-microvolt = <1025000 1025000 1250000>; - }; - opp@1300000000,1050 { opp-microvolt = <1050000 1050000 1250000>; }; - opp@1300000000,1050,2,1 { - opp-microvolt = <1050000 1050000 1250000>; - }; - - opp@1300000000,1050,3,2 { - opp-microvolt = <1050000 1050000 1250000>; - }; - - opp@1300000000,1050,3,3 { - opp-microvolt = <1050000 1050000 1250000>; - }; - - opp@1300000000,1050,3,4 { - opp-microvolt = <1050000 1050000 1250000>; - }; - - opp@1300000000,1050,3,5 { - opp-microvolt = <1050000 1050000 1250000>; - }; - - opp@1300000000,1050,3,6 { - opp-microvolt = <1050000 1050000 1250000>; - }; - - opp@1300000000,1050,2,7 { - opp-microvolt = <1050000 1050000 1250000>; - }; - - opp@1300000000,1050,2,8 { - opp-microvolt = <1050000 1050000 1250000>; - }; - - opp@1300000000,1050,3,12 { - opp-microvolt = <1050000 1050000 1250000>; - }; - - opp@1300000000,1050,3,13 { - opp-microvolt = <1050000 1050000 1250000>; - }; - opp@1300000000,1075 { opp-microvolt = <1075000 1075000 1250000>; }; - opp@1300000000,1075,2,2 { - opp-microvolt = <1075000 1075000 1250000>; - }; - - opp@1300000000,1075,2,3 { - opp-microvolt = <1075000 1075000 1250000>; - }; - - opp@1300000000,1075,2,4 { - opp-microvolt = <1075000 1075000 1250000>; - }; - opp@1300000000,1100 { opp-microvolt = <1100000 1100000 1250000>; }; @@ -722,10 +246,6 @@ opp-microvolt = <1150000 1150000 1250000>; }; - opp@1400000000,1150,2,4 { - opp-microvolt = <1150000 1150000 1250000>; - }; - opp@1400000000,1175 { opp-microvolt = <1175000 1175000 1250000>; }; @@ -738,42 +258,10 @@ opp-microvolt = <1125000 1125000 1250000>; }; - opp@1500000000,1125,4,5 { - opp-microvolt = <1125000 1125000 1250000>; - }; - - opp@1500000000,1125,4,6 { - opp-microvolt = <1125000 1125000 1250000>; - }; - - opp@1500000000,1125,4,12 { - opp-microvolt = <1125000 1125000 1250000>; - }; - - opp@1500000000,1125,4,13 { - opp-microvolt = <1125000 1125000 1250000>; - }; - opp@1500000000,1150 { opp-microvolt = <1150000 1150000 1250000>; }; - opp@1500000000,1150,3,5 { - opp-microvolt = <1150000 1150000 1250000>; - }; - - opp@1500000000,1150,3,6 { - opp-microvolt = <1150000 1150000 1250000>; - }; - - opp@1500000000,1150,3,12 { - opp-microvolt = <1150000 1150000 1250000>; - }; - - opp@1500000000,1150,3,13 { - opp-microvolt = <1150000 1150000 1250000>; - }; - opp@1500000000,1200 { opp-microvolt = <1200000 1200000 1250000>; }; diff --git a/arch/arm/boot/dts/tegra30-cpu-opp.dtsi b/arch/arm/boot/dts/tegra30-cpu-opp.dtsi index 8e434f6713cd20..0f7135006d197c 100644 --- a/arch/arm/boot/dts/tegra30-cpu-opp.dtsi +++ b/arch/arm/boot/dts/tegra30-cpu-opp.dtsi @@ -109,667 +109,188 @@ opp@475000000,850 { clock-latency-ns = <100000>; - opp-supported-hw = <0x0F 0x0001>; - opp-hz = /bits/ 64 <475000000>; - }; - - opp@475000000,850,0,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0002>; - opp-hz = /bits/ 64 <475000000>; - }; - - opp@475000000,850,0,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0010>; - opp-hz = /bits/ 64 <475000000>; - }; - - opp@475000000,850,0,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0080>; - opp-hz = /bits/ 64 <475000000>; - }; - - opp@475000000,850,0,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0100>; - opp-hz = /bits/ 64 <475000000>; - }; - - opp@608000000,850 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x1F 0x0400>; - opp-hz = /bits/ 64 <608000000>; - }; - - opp@608000000,912 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x1F 0x0200>; - opp-hz = /bits/ 64 <608000000>; - }; - - opp@620000000,850 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x1E 0x306C>; - opp-hz = /bits/ 64 <620000000>; - }; - - opp@640000000,850 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x0F 0x0001>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,1,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0002>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,2,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0002>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,3,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0002>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,1,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0010>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,2,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0010>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,3,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0010>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,1,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0080>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,2,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0080>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,3,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0080>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,4,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0080>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,1,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0100>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,2,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0100>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,3,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0100>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,850,4,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0100>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@640000000,900 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0192>; - opp-hz = /bits/ 64 <640000000>; - }; - - opp@760000000,850 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x1E 0x3461>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,850,3,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0002>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,850,3,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0004>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,850,3,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0008>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,850,3,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0010>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,850,3,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0080>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,850,4,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0080>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,850,3,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0100>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,850,4,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0100>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,850,0,10 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0400>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0001>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,1,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0002>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,2,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0002>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,1,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0004>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,2,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0004>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,1,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0008>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,2,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0008>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,1,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0010>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,2,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0010>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,1,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0080>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,2,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0080>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,1,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0100>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,900,2,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0100>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,912 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x1F 0x0200>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@760000000,975 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0192>; - opp-hz = /bits/ 64 <760000000>; - }; - - opp@816000000,850 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x1F 0x0400>; - opp-hz = /bits/ 64 <816000000>; - }; - - opp@816000000,912 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x1F 0x0200>; - opp-hz = /bits/ 64 <816000000>; - }; - - opp@860000000,850 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x0C 0x0001>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0001>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,2,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0002>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,3,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0002>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,2,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0004>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,3,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0004>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,2,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0008>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,3,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0008>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,2,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0010>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,3,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0010>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,2,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0080>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,3,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0080>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,4,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0080>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,2,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0100>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,3,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0100>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,900,4,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0100>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,975 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0001>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,975,1,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0002>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,975,1,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0004>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,975,1,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0008>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,975,1,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0010>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,975,1,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0080>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,975,1,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0100>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@860000000,1000 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0192>; - opp-hz = /bits/ 64 <860000000>; - }; - - opp@910000000,900 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x18 0x3060>; - opp-hz = /bits/ 64 <910000000>; - }; - - opp@1000000000,900 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x0C 0x0001>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,975 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x03 0x0001>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,975,2,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0002>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,975,3,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0002>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,975,2,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0004>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,975,3,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0004>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,975,2,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0008>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,975,3,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0008>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,975,2,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0010>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,975,3,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0010>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,975,2,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0080>; - opp-hz = /bits/ 64 <1000000000>; - }; - - opp@1000000000,975,3,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0080>; - opp-hz = /bits/ 64 <1000000000>; + opp-supported-hw = <0x0F 0x0001>, <0x01 0x0002>, + <0x01 0x0010>, <0x01 0x0080>, + <0x01 0x0100>; + opp-hz = /bits/ 64 <475000000>; }; - opp@1000000000,975,4,7 { + opp@608000000,850 { clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0080>; - opp-hz = /bits/ 64 <1000000000>; + opp-supported-hw = <0x1F 0x0400>; + opp-hz = /bits/ 64 <608000000>; }; - opp@1000000000,975,2,8 { + opp@608000000,912 { clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0100>; - opp-hz = /bits/ 64 <1000000000>; + opp-supported-hw = <0x1F 0x0200>; + opp-hz = /bits/ 64 <608000000>; }; - opp@1000000000,975,3,8 { + opp@620000000,850 { clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0100>; - opp-hz = /bits/ 64 <1000000000>; + opp-supported-hw = <0x1E 0x306C>; + opp-hz = /bits/ 64 <620000000>; }; - opp@1000000000,975,4,8 { + opp@640000000,850 { clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0100>; - opp-hz = /bits/ 64 <1000000000>; + opp-supported-hw = <0x0F 0x0001>, <0x02 0x0002>, + <0x04 0x0002>, <0x08 0x0002>, + <0x02 0x0010>, <0x04 0x0010>, + <0x08 0x0010>, <0x02 0x0080>, + <0x04 0x0080>, <0x08 0x0080>, + <0x10 0x0080>, <0x02 0x0100>, + <0x04 0x0100>, <0x08 0x0100>, + <0x10 0x0100>; + opp-hz = /bits/ 64 <640000000>; }; - opp@1000000000,1000 { + opp@640000000,900 { clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x019E>; - opp-hz = /bits/ 64 <1000000000>; + opp-supported-hw = <0x01 0x0192>; + opp-hz = /bits/ 64 <640000000>; }; - opp@1000000000,1025 { + opp@760000000,850 { clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0192>; - opp-hz = /bits/ 64 <1000000000>; + opp-supported-hw = <0x1E 0x3461>, <0x08 0x0002>, + <0x08 0x0004>, <0x08 0x0008>, + <0x08 0x0010>, <0x08 0x0080>, + <0x10 0x0080>, <0x08 0x0100>, + <0x10 0x0100>, <0x01 0x0400>; + opp-hz = /bits/ 64 <760000000>; }; - opp@1100000000,900 { + opp@760000000,900 { clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0001>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x01 0x0001>, <0x02 0x0002>, + <0x04 0x0002>, <0x02 0x0004>, + <0x04 0x0004>, <0x02 0x0008>, + <0x04 0x0008>, <0x02 0x0010>, + <0x04 0x0010>, <0x02 0x0080>, + <0x04 0x0080>, <0x02 0x0100>, + <0x04 0x0100>; + opp-hz = /bits/ 64 <760000000>; }; - opp@1100000000,975 { + opp@760000000,912 { clock-latency-ns = <100000>; - opp-supported-hw = <0x06 0x0001>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x1F 0x0200>; + opp-hz = /bits/ 64 <760000000>; }; - opp@1100000000,975,3,1 { + opp@760000000,975 { clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0002>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x01 0x0192>; + opp-hz = /bits/ 64 <760000000>; }; - opp@1100000000,975,3,2 { + opp@816000000,850 { clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0004>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x1F 0x0400>; + opp-hz = /bits/ 64 <816000000>; }; - opp@1100000000,975,3,3 { + opp@816000000,912 { clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0008>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x1F 0x0200>; + opp-hz = /bits/ 64 <816000000>; }; - opp@1100000000,975,3,4 { + opp@860000000,850 { clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0010>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x0C 0x0001>; + opp-hz = /bits/ 64 <860000000>; }; - opp@1100000000,975,3,7 { + opp@860000000,900 { clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0080>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x02 0x0001>, <0x04 0x0002>, + <0x08 0x0002>, <0x04 0x0004>, + <0x08 0x0004>, <0x04 0x0008>, + <0x08 0x0008>, <0x04 0x0010>, + <0x08 0x0010>, <0x04 0x0080>, + <0x08 0x0080>, <0x10 0x0080>, + <0x04 0x0100>, <0x08 0x0100>, + <0x10 0x0100>; + opp-hz = /bits/ 64 <860000000>; }; - opp@1100000000,975,4,7 { + opp@860000000,975 { clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0080>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x01 0x0001>, <0x02 0x0002>, + <0x02 0x0004>, <0x02 0x0008>, + <0x02 0x0010>, <0x02 0x0080>, + <0x02 0x0100>; + opp-hz = /bits/ 64 <860000000>; }; - opp@1100000000,975,3,8 { + opp@860000000,1000 { clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0100>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x01 0x0192>; + opp-hz = /bits/ 64 <860000000>; }; - opp@1100000000,975,4,8 { + opp@910000000,900 { clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0100>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x18 0x3060>; + opp-hz = /bits/ 64 <910000000>; }; - opp@1100000000,1000 { + opp@1000000000,900 { clock-latency-ns = <100000>; - opp-supported-hw = <0x01 0x0001>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x0C 0x0001>; + opp-hz = /bits/ 64 <1000000000>; }; - opp@1100000000,1000,2,1 { + opp@1000000000,975 { clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0002>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x03 0x0001>, <0x04 0x0002>, + <0x08 0x0002>, <0x04 0x0004>, + <0x08 0x0004>, <0x04 0x0008>, + <0x08 0x0008>, <0x04 0x0010>, + <0x08 0x0010>, <0x04 0x0080>, + <0x08 0x0080>, <0x10 0x0080>, + <0x04 0x0100>, <0x08 0x0100>, + <0x10 0x0100>; + opp-hz = /bits/ 64 <1000000000>; }; - opp@1100000000,1000,2,2 { + opp@1000000000,1000 { clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0004>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x02 0x019E>; + opp-hz = /bits/ 64 <1000000000>; }; - opp@1100000000,1000,2,3 { + opp@1000000000,1025 { clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0008>; - opp-hz = /bits/ 64 <1100000000>; + opp-supported-hw = <0x01 0x0192>; + opp-hz = /bits/ 64 <1000000000>; }; - opp@1100000000,1000,2,4 { + opp@1100000000,900 { clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0010>; + opp-supported-hw = <0x08 0x0001>; opp-hz = /bits/ 64 <1100000000>; }; - opp@1100000000,1000,2,7 { + opp@1100000000,975 { clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0080>; + opp-supported-hw = <0x06 0x0001>, <0x08 0x0002>, + <0x08 0x0004>, <0x08 0x0008>, + <0x08 0x0010>, <0x08 0x0080>, + <0x10 0x0080>, <0x08 0x0100>, + <0x10 0x0100>; opp-hz = /bits/ 64 <1100000000>; }; - opp@1100000000,1000,2,8 { + opp@1100000000,1000 { clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0100>; + opp-supported-hw = <0x01 0x0001>, <0x04 0x0002>, + <0x04 0x0004>, <0x04 0x0008>, + <0x04 0x0010>, <0x04 0x0080>, + <0x04 0x0100>; opp-hz = /bits/ 64 <1100000000>; }; @@ -799,97 +320,20 @@ opp@1200000000,1000 { clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0001>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1000,3,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0002>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1000,3,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0004>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1000,3,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0008>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1000,3,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0010>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1000,3,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0080>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1000,4,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0080>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1000,3,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0100>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1000,4,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0100>; + opp-supported-hw = <0x04 0x0001>, <0x08 0x0002>, + <0x08 0x0004>, <0x08 0x0008>, + <0x08 0x0010>, <0x08 0x0080>, + <0x10 0x0080>, <0x08 0x0100>, + <0x10 0x0100>; opp-hz = /bits/ 64 <1200000000>; }; opp@1200000000,1025 { clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0001>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1025,2,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0002>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1025,2,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0004>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1025,2,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0008>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1025,2,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0010>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1025,2,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0080>; - opp-hz = /bits/ 64 <1200000000>; - }; - - opp@1200000000,1025,2,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0100>; + opp-supported-hw = <0x02 0x0001>, <0x04 0x0002>, + <0x04 0x0004>, <0x04 0x0008>, + <0x04 0x0010>, <0x04 0x0080>, + <0x04 0x0100>; opp-hz = /bits/ 64 <1200000000>; }; @@ -913,133 +357,33 @@ opp@1300000000,1000 { clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0001>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1000,4,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0080>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1000,4,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0100>; + opp-supported-hw = <0x08 0x0001>, <0x10 0x0080>, + <0x10 0x0100>; opp-hz = /bits/ 64 <1300000000>; }; opp@1300000000,1025 { clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0001>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1025,3,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0002>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1025,3,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0080>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1025,3,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0100>; + opp-supported-hw = <0x04 0x0001>, <0x08 0x0002>, + <0x08 0x0080>, <0x08 0x0100>; opp-hz = /bits/ 64 <1300000000>; }; opp@1300000000,1050 { clock-latency-ns = <100000>; - opp-supported-hw = <0x12 0x3061>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1050,2,1 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0002>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1050,3,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0004>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1050,3,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0008>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1050,3,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0010>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1050,3,5 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0020>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1050,3,6 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0040>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1050,2,7 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0080>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1050,2,8 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0100>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1050,3,12 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x1000>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1050,3,13 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x2000>; + opp-supported-hw = <0x12 0x3061>, <0x04 0x0002>, + <0x08 0x0004>, <0x08 0x0008>, + <0x08 0x0010>, <0x08 0x0020>, + <0x08 0x0040>, <0x04 0x0080>, + <0x04 0x0100>, <0x08 0x1000>, + <0x08 0x2000>; opp-hz = /bits/ 64 <1300000000>; }; opp@1300000000,1075 { clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x0182>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1075,2,2 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0004>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1075,2,3 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0008>; - opp-hz = /bits/ 64 <1300000000>; - }; - - opp@1300000000,1075,2,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0010>; + opp-supported-hw = <0x02 0x0182>, <0x04 0x0004>, + <0x04 0x0008>, <0x04 0x0010>; opp-hz = /bits/ 64 <1300000000>; }; @@ -1081,13 +425,7 @@ opp@1400000000,1150 { clock-latency-ns = <100000>; - opp-supported-hw = <0x02 0x000C>; - opp-hz = /bits/ 64 <1400000000>; - }; - - opp@1400000000,1150,2,4 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0010>; + opp-supported-hw = <0x02 0x000C>, <0x04 0x0010>; opp-hz = /bits/ 64 <1400000000>; }; @@ -1105,61 +443,17 @@ opp@1500000000,1125 { clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0010>; - opp-hz = /bits/ 64 <1500000000>; - }; - - opp@1500000000,1125,4,5 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0020>; - opp-hz = /bits/ 64 <1500000000>; - }; - - opp@1500000000,1125,4,6 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x0040>; - opp-hz = /bits/ 64 <1500000000>; - }; - - opp@1500000000,1125,4,12 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x1000>; - opp-hz = /bits/ 64 <1500000000>; - }; - - opp@1500000000,1125,4,13 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x10 0x2000>; + opp-supported-hw = <0x08 0x0010>, <0x10 0x0020>, + <0x10 0x0040>, <0x10 0x1000>, + <0x10 0x2000>; opp-hz = /bits/ 64 <1500000000>; }; opp@1500000000,1150 { clock-latency-ns = <100000>; - opp-supported-hw = <0x04 0x0010>; - opp-hz = /bits/ 64 <1500000000>; - }; - - opp@1500000000,1150,3,5 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0020>; - opp-hz = /bits/ 64 <1500000000>; - }; - - opp@1500000000,1150,3,6 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x0040>; - opp-hz = /bits/ 64 <1500000000>; - }; - - opp@1500000000,1150,3,12 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x1000>; - opp-hz = /bits/ 64 <1500000000>; - }; - - opp@1500000000,1150,3,13 { - clock-latency-ns = <100000>; - opp-supported-hw = <0x08 0x2000>; + opp-supported-hw = <0x04 0x0010>, <0x08 0x0020>, + <0x08 0x0040>, <0x08 0x1000>, + <0x08 0x2000>; opp-hz = /bits/ 64 <1500000000>; }; From cb60e9602cce1593eb1e9cdc8ee562815078a354 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 31 Aug 2020 11:22:37 +0530 Subject: [PATCH 15/65] opp: Prevent memory leak in dev_pm_opp_attach_genpd() If dev_pm_opp_attach_genpd() is called multiple times (once for each CPU sharing the table), then it would result in unwanted behavior like memory leak, attaching the domain multiple times, etc. Handle that by checking and returning earlier if the domains are already attached. Now that dev_pm_opp_detach_genpd() can get called multiple times as well, we need to protect that too. Note that the virtual device pointers aren't returned in this case, as they may become unavailable to some callers during the middle of the operation. Reported-by: Stephan Gerhold Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 000d0fcb468097..e65174725a4d5a 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1960,6 +1960,9 @@ static void _opp_detach_genpd(struct opp_table *opp_table) { int index; + if (!opp_table->genpd_virt_devs) + return; + for (index = 0; index < opp_table->required_opp_count; index++) { if (!opp_table->genpd_virt_devs[index]) continue; @@ -2006,6 +2009,9 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, if (IS_ERR(opp_table)) return opp_table; + if (opp_table->genpd_virt_devs) + return opp_table; + /* * If the genpd's OPP table isn't already initialized, parsing of the * required-opps fail for dev. We should retry this after genpd's OPP From a5663c9b1e31c00e0bdfaf4d92eb51358fc3950f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 11 Sep 2020 14:56:18 +0530 Subject: [PATCH 16/65] opp: Allow opp-level to be set to 0 The DT bindings don't put such a constraint, nor should the kernel. It is perfectly fine for opp-level to be set to 0, if we need to put the performance state votes for a domain for a particular OPP. Reported-by: Stephan Gerhold Tested-by: Stephan Gerhold Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 3 --- drivers/opp/of.c | 20 +++++++------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index e65174725a4d5a..8d047300d001d4 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2109,9 +2109,6 @@ int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, int dest_pstate = -EINVAL; int i; - if (!pstate) - return 0; - /* * Normally the src_table will have the "required_opps" property set to * point to one of the OPPs in the dst_table, but in some cases the diff --git a/drivers/opp/of.c b/drivers/opp/of.c index aa829a569825e4..874b5875622022 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -842,7 +842,7 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) { struct device_node *np; - int ret, count = 0, pstate_count = 0; + int ret, count = 0; struct dev_pm_opp *opp; /* OPP table is already initialized for the device */ @@ -876,20 +876,14 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) goto remove_static_opp; } - list_for_each_entry(opp, &opp_table->opp_list, node) - pstate_count += !!opp->pstate; - - /* Either all or none of the nodes shall have performance state set */ - if (pstate_count && pstate_count != count) { - dev_err(dev, "Not all nodes have performance state set (%d: %d)\n", - count, pstate_count); - ret = -ENOENT; - goto remove_static_opp; + list_for_each_entry(opp, &opp_table->opp_list, node) { + /* Any non-zero performance state would enable the feature */ + if (opp->pstate) { + opp_table->genpd_performance_state = true; + break; + } } - if (pstate_count) - opp_table->genpd_performance_state = true; - return 0; remove_static_opp: From b89c01c960511dcffe3666d89645c95445d00902 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 24 Aug 2020 15:59:07 +0100 Subject: [PATCH 17/65] cpufreq: tegra186: Fix initial frequency Commit 6cc3d0e9a097 ("cpufreq: tegra186: add CPUFREQ_NEED_INITIAL_FREQ_CHECK flag") fixed CPUFREQ support for Tegra186 but as a consequence the following warnings are now seen on boot ... cpufreq: cpufreq_online: CPU0: Running at unlisted freq: 0 KHz cpufreq: cpufreq_online: CPU0: Unlisted initial frequency changed to: 2035200 KHz cpufreq: cpufreq_online: CPU1: Running at unlisted freq: 0 KHz cpufreq: cpufreq_online: CPU1: Unlisted initial frequency changed to: 2035200 KHz cpufreq: cpufreq_online: CPU2: Running at unlisted freq: 0 KHz cpufreq: cpufreq_online: CPU2: Unlisted initial frequency changed to: 2035200 KHz cpufreq: cpufreq_online: CPU3: Running at unlisted freq: 0 KHz cpufreq: cpufreq_online: CPU3: Unlisted initial frequency changed to: 2035200 KHz cpufreq: cpufreq_online: CPU4: Running at unlisted freq: 0 KHz cpufreq: cpufreq_online: CPU4: Unlisted initial frequency changed to: 2035200 KHz cpufreq: cpufreq_online: CPU5: Running at unlisted freq: 0 KHz cpufreq: cpufreq_online: CPU5: Unlisted initial frequency changed to: 2035200 KHz Fix this by adding a 'get' callback for the Tegra186 CPUFREQ driver to retrieve the current operating frequency for a given CPU. The 'get' callback uses the current 'ndiv' value that is programmed to determine that current operating frequency. Signed-off-by: Jon Hunter [ Viresh: Return 0 on error ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra186-cpufreq.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index 01e1f58ba42279..4b4079f515596a 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -14,6 +14,7 @@ #define EDVD_CORE_VOLT_FREQ(core) (0x20 + (core) * 0x4) #define EDVD_CORE_VOLT_FREQ_F_SHIFT 0 +#define EDVD_CORE_VOLT_FREQ_F_MASK 0xffff #define EDVD_CORE_VOLT_FREQ_V_SHIFT 16 struct tegra186_cpufreq_cluster_info { @@ -91,10 +92,39 @@ static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy, return 0; } +static unsigned int tegra186_cpufreq_get(unsigned int cpu) +{ + struct cpufreq_frequency_table *tbl; + struct cpufreq_policy *policy; + void __iomem *edvd_reg; + unsigned int i, freq = 0; + u32 ndiv; + + policy = cpufreq_cpu_get(cpu); + if (!policy) + return 0; + + tbl = policy->freq_table; + edvd_reg = policy->driver_data; + ndiv = readl(edvd_reg) & EDVD_CORE_VOLT_FREQ_F_MASK; + + for (i = 0; tbl[i].frequency != CPUFREQ_TABLE_END; i++) { + if ((tbl[i].driver_data & EDVD_CORE_VOLT_FREQ_F_MASK) == ndiv) { + freq = tbl[i].frequency; + break; + } + } + + cpufreq_cpu_put(policy); + + return freq; +} + static struct cpufreq_driver tegra186_cpufreq_driver = { .name = "tegra186", .flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .get = tegra186_cpufreq_get, .verify = cpufreq_generic_frequency_table_verify, .target_index = tegra186_cpufreq_set_target, .init = tegra186_cpufreq_init, From 629238068eb905297d5edc710c41076456ae8338 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 26 Aug 2020 18:00:15 +0200 Subject: [PATCH 18/65] cpufreq: s5pv210: Simplify with dev_err_probe() Common pattern of handling deferred probe can be simplified with dev_err_probe(). Less code and also it prints the error value. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar --- drivers/cpufreq/s5pv210-cpufreq.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index e84281e2561d22..7dccdb364fcfb3 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -590,6 +590,7 @@ static struct notifier_block s5pv210_cpufreq_reboot_notifier = { static int s5pv210_cpufreq_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct device_node *np; int id, result = 0; @@ -602,21 +603,14 @@ static int s5pv210_cpufreq_probe(struct platform_device *pdev) * cpufreq-dt driver. */ arm_regulator = regulator_get(NULL, "vddarm"); - if (IS_ERR(arm_regulator)) { - if (PTR_ERR(arm_regulator) == -EPROBE_DEFER) - pr_debug("vddarm regulator not ready, defer\n"); - else - pr_err("failed to get regulator vddarm\n"); - return PTR_ERR(arm_regulator); - } + if (IS_ERR(arm_regulator)) + return dev_err_probe(dev, PTR_ERR(arm_regulator), + "failed to get regulator vddarm\n"); int_regulator = regulator_get(NULL, "vddint"); if (IS_ERR(int_regulator)) { - if (PTR_ERR(int_regulator) == -EPROBE_DEFER) - pr_debug("vddint regulator not ready, defer\n"); - else - pr_err("failed to get regulator vddint\n"); - result = PTR_ERR(int_regulator); + result = dev_err_probe(dev, PTR_ERR(int_regulator), + "failed to get regulator vddint\n"); goto err_int_regulator; } From 77c6d5cd93f52e6692fbe570594d4e14ec0b63e8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 26 Aug 2020 18:00:16 +0200 Subject: [PATCH 19/65] cpufreq: s5pv210: Use dev_err instead of pr_err in probe dev_err() allows easily to identify the device printing the message so no need for __func__. Signed-off-by: Krzysztof Kozlowski [ Viresh: Don't remove update to result variable ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/s5pv210-cpufreq.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 7dccdb364fcfb3..bed496cf8d2471 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -616,8 +616,7 @@ static int s5pv210_cpufreq_probe(struct platform_device *pdev) np = of_find_compatible_node(NULL, NULL, "samsung,s5pv210-clock"); if (!np) { - pr_err("%s: failed to find clock controller DT node\n", - __func__); + dev_err(dev, "failed to find clock controller DT node\n"); result = -ENODEV; goto err_clock; } @@ -625,7 +624,7 @@ static int s5pv210_cpufreq_probe(struct platform_device *pdev) clk_base = of_iomap(np, 0); of_node_put(np); if (!clk_base) { - pr_err("%s: failed to map clock registers\n", __func__); + dev_err(dev, "failed to map clock registers\n"); result = -EFAULT; goto err_clock; } @@ -633,8 +632,7 @@ static int s5pv210_cpufreq_probe(struct platform_device *pdev) for_each_compatible_node(np, NULL, "samsung,s5pv210-dmc") { id = of_alias_get_id(np, "dmc"); if (id < 0 || id >= ARRAY_SIZE(dmc_base)) { - pr_err("%s: failed to get alias of dmc node '%pOFn'\n", - __func__, np); + dev_err(dev, "failed to get alias of dmc node '%pOFn'\n", np); of_node_put(np); result = id; goto err_clk_base; @@ -642,8 +640,7 @@ static int s5pv210_cpufreq_probe(struct platform_device *pdev) dmc_base[id] = of_iomap(np, 0); if (!dmc_base[id]) { - pr_err("%s: failed to map dmc%d registers\n", - __func__, id); + dev_err(dev, "failed to map dmc%d registers\n", id); of_node_put(np); result = -EFAULT; goto err_dmc; @@ -652,7 +649,7 @@ static int s5pv210_cpufreq_probe(struct platform_device *pdev) for (id = 0; id < ARRAY_SIZE(dmc_base); ++id) { if (!dmc_base[id]) { - pr_err("%s: failed to find dmc%d node\n", __func__, id); + dev_err(dev, "failed to find dmc%d node\n", id); result = -ENODEV; goto err_dmc; } From 01a163c52039e9426c7d3d3ab16ca261ad622597 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 31 Aug 2020 08:10:11 +0200 Subject: [PATCH 20/65] cpufreq: sti-cpufreq: add stih418 support The STiH418 can be controlled the same way as STiH407 & STiH410 regarding cpufreq. Signed-off-by: Alain Volmat Signed-off-by: Viresh Kumar --- drivers/cpufreq/sti-cpufreq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index a5ad96d29adca4..4ac6fb23792a02 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -141,7 +141,8 @@ static const struct reg_field sti_stih407_dvfs_regfields[DVFS_MAX_REGFIELDS] = { static const struct reg_field *sti_cpufreq_match(void) { if (of_machine_is_compatible("st,stih407") || - of_machine_is_compatible("st,stih410")) + of_machine_is_compatible("st,stih410") || + of_machine_is_compatible("st,stih418")) return sti_stih407_dvfs_regfields; return NULL; @@ -258,7 +259,8 @@ static int sti_cpufreq_init(void) int ret; if ((!of_machine_is_compatible("st,stih407")) && - (!of_machine_is_compatible("st,stih410"))) + (!of_machine_is_compatible("st,stih410")) && + (!of_machine_is_compatible("st,stih418"))) return -ENODEV; ddata.cpu = get_cpu_device(0); From 305accf3b53ae679c1333022c33f95a7e337a471 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 31 Aug 2020 08:10:12 +0200 Subject: [PATCH 21/65] cpufreq: dt-platdev: Blacklist st,stih418 SoC Add st,stih418 SoC in the blacklist since the cpufreq driver for this platform is already registering the driver. Signed-off-by: Alain Volmat Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 7d01df7bfa6cd3..3776d960f405ef 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -137,6 +137,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, + { .compatible = "st,stih418", }, { .compatible = "sigma,tango4", }, From a0d698d8c21aaa71b2a2fc6c938d658797e31911 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 31 Aug 2020 08:10:13 +0200 Subject: [PATCH 22/65] cpufreq: arm: Kconfig: add CPUFREQ_DT depend for STI CPUFREQ The sti cpufreq driver is relying on the CPUFREQ_DT driver hence add the depends within the Kconfig.arm Signed-off-by: Alain Volmat Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index cb72fb507d5740..bf5830eb664ffa 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -283,7 +283,7 @@ config ARM_SPEAR_CPUFREQ config ARM_STI_CPUFREQ tristate "STi CPUFreq support" - depends on SOC_STIH407 + depends on CPUFREQ_DT && SOC_STIH407 help This driver uses the generic OPP framework to match the running platform with a predefined set of suitable values. If not provided From c942d1542f1bc5001216fabce9cb8ffbe515777e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 7 Sep 2020 15:27:16 +0200 Subject: [PATCH 23/65] cpufreq: armada-37xx: Add missing MODULE_DEVICE_TABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_ARM_ARMADA_37XX_CPUFREQ is tristate option and therefore this cpufreq driver can be compiled as a module. This patch adds missing MODULE_DEVICE_TABLE which generates correct modalias for automatic loading of this cpufreq driver when is compiled as an external module. Reviewed-by: Andrew Lunn Signed-off-by: Pali Rohár Fixes: 92ce45fb875d7 ("cpufreq: Add DVFS support for Armada 37xx") [ Viresh: Added __maybe_unused ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-37xx-cpufreq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index df1c941260d140..b4af4094309b09 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -484,6 +484,12 @@ static int __init armada37xx_cpufreq_driver_init(void) /* late_initcall, to guarantee the driver is loaded after A37xx clock driver */ late_initcall(armada37xx_cpufreq_driver_init); +static const struct of_device_id __maybe_unused armada37xx_cpufreq_of_match[] = { + { .compatible = "marvell,armada-3700-nb-pm" }, + { }, +}; +MODULE_DEVICE_TABLE(of, armada37xx_cpufreq_of_match); + MODULE_AUTHOR("Gregory CLEMENT "); MODULE_DESCRIPTION("Armada 37xx cpufreq driver"); MODULE_LICENSE("GPL"); From bd74e286b354134863ce633747fa4bfe541e440a Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 8 Sep 2020 13:27:12 +0530 Subject: [PATCH 24/65] cpufreq: qcom-hw: Make use of cpufreq driver_data for passing pdev Get rid of global_pdev pointer and make use of cpufreq driver_data for passing the reference of pdev. This aligns with what other cpufreq drivers are doing. Signed-off-by: Manivannan Sadhasivam Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 3fb044b907a834..ccea34f61152f5 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -30,7 +30,6 @@ #define REG_PERF_STATE 0x920 static unsigned long cpu_hw_rate, xo_rate; -static struct platform_device *global_pdev; static bool icc_scaling_enabled; static int qcom_cpufreq_set_bw(struct cpufreq_policy *policy, @@ -240,7 +239,8 @@ static void qcom_get_related_cpus(int index, struct cpumask *m) static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) { - struct device *dev = &global_pdev->dev; + struct platform_device *pdev = cpufreq_get_driver_data(); + struct device *dev = &pdev->dev; struct of_phandle_args args; struct device_node *cpu_np; struct device *cpu_dev; @@ -267,7 +267,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) index = args.args[0]; - res = platform_get_resource(global_pdev, IORESOURCE_MEM, index); + res = platform_get_resource(pdev, IORESOURCE_MEM, index); if (!res) return -ENODEV; @@ -316,11 +316,12 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) { struct device *cpu_dev = get_cpu_device(policy->cpu); void __iomem *base = policy->driver_data - REG_PERF_STATE; + struct platform_device *pdev = cpufreq_get_driver_data(); dev_pm_opp_remove_all_dynamic(cpu_dev); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); kfree(policy->freq_table); - devm_iounmap(&global_pdev->dev, base); + devm_iounmap(&pdev->dev, base); return 0; } @@ -365,7 +366,7 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) cpu_hw_rate = clk_get_rate(clk) / CLK_HW_DIV; clk_put(clk); - global_pdev = pdev; + cpufreq_qcom_hw_driver.driver_data = pdev; /* Check for optional interconnect paths on CPU0 */ cpu_dev = get_cpu_device(0); From 75319b4600e7b7adfec80ea0a3ccc07f22290c89 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 15 Sep 2020 12:54:19 +0530 Subject: [PATCH 25/65] dt-bindings: cpufreq: cpufreq-qcom-hw: Document Qcom EPSS compatible The hardware block which carries out CPUFreq operations on SM8250 SoC is called EPSS. Hence, document the compatible. Signed-off-by: Manivannan Sadhasivam Reviewed-by: Amit Kucheria Reviewed-by: Bjorn Andersson Acked-by: Rob Herring Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.txt index 33856947c56109..9299028ee7123d 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.txt +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.txt @@ -8,7 +8,7 @@ Properties: - compatible Usage: required Value type: - Definition: must be "qcom,cpufreq-hw". + Definition: must be "qcom,cpufreq-hw" or "qcom,cpufreq-epss". - clocks Usage: required From f17b3e44320b4079e69135c63d1e416b0703a21e Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 15 Sep 2020 12:54:21 +0530 Subject: [PATCH 26/65] cpufreq: qcom-hw: Use devm_platform_ioremap_resource() to simplify code devm_platform_ioremap_resource() is the combination of platform_get_resource() and devm_ioremap_resource(). Hence, use it to simplify the code a bit. Signed-off-by: Manivannan Sadhasivam Reviewed-by: Amit Kucheria Reviewed-by: Bjorn Andersson Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index ccea34f61152f5..8a303783927f26 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -244,7 +244,6 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) struct of_phandle_args args; struct device_node *cpu_np; struct device *cpu_dev; - struct resource *res; void __iomem *base; int ret, index; @@ -267,13 +266,9 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) index = args.args[0]; - res = platform_get_resource(pdev, IORESOURCE_MEM, index); - if (!res) - return -ENODEV; - - base = devm_ioremap(dev, res->start, resource_size(res)); - if (!base) - return -ENOMEM; + base = devm_platform_ioremap_resource(pdev, index); + if (IS_ERR(base)) + return PTR_ERR(base); /* HW should be in enabled state to proceed */ if (!(readl_relaxed(base + REG_ENABLE) & 0x1)) { From dcd1fd724c19fe964e6415cb161ca58ef99f86ef Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 15 Sep 2020 12:54:22 +0530 Subject: [PATCH 27/65] cpufreq: qcom-hw: Use of_device_get_match_data for offsets and row size For preparing the driver to handle further SoC revisions, let's use the of_device_get_match_data() API for getting the device specific offsets and row size instead of defining them globally. Signed-off-by: Manivannan Sadhasivam Reviewed-by: Bjorn Andersson Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 89 ++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 30 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 8a303783927f26..e3c46984a037f5 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -19,15 +19,21 @@ #define LUT_L_VAL GENMASK(7, 0) #define LUT_CORE_COUNT GENMASK(18, 16) #define LUT_VOLT GENMASK(11, 0) -#define LUT_ROW_SIZE 32 #define CLK_HW_DIV 2 #define LUT_TURBO_IND 1 -/* Register offsets */ -#define REG_ENABLE 0x0 -#define REG_FREQ_LUT 0x110 -#define REG_VOLT_LUT 0x114 -#define REG_PERF_STATE 0x920 +struct qcom_cpufreq_soc_data { + u32 reg_enable; + u32 reg_freq_lut; + u32 reg_volt_lut; + u32 reg_perf_state; + u8 lut_row_size; +}; + +struct qcom_cpufreq_data { + void __iomem *base; + const struct qcom_cpufreq_soc_data *soc_data; +}; static unsigned long cpu_hw_rate, xo_rate; static bool icc_scaling_enabled; @@ -76,10 +82,11 @@ static int qcom_cpufreq_update_opp(struct device *cpu_dev, static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy, unsigned int index) { - void __iomem *perf_state_reg = policy->driver_data; + struct qcom_cpufreq_data *data = policy->driver_data; + const struct qcom_cpufreq_soc_data *soc_data = data->soc_data; unsigned long freq = policy->freq_table[index].frequency; - writel_relaxed(index, perf_state_reg); + writel_relaxed(index, data->base + soc_data->reg_perf_state); if (icc_scaling_enabled) qcom_cpufreq_set_bw(policy, freq); @@ -91,7 +98,8 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy, static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) { - void __iomem *perf_state_reg; + struct qcom_cpufreq_data *data; + const struct qcom_cpufreq_soc_data *soc_data; struct cpufreq_policy *policy; unsigned int index; @@ -99,9 +107,10 @@ static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) if (!policy) return 0; - perf_state_reg = policy->driver_data; + data = policy->driver_data; + soc_data = data->soc_data; - index = readl_relaxed(perf_state_reg); + index = readl_relaxed(data->base + soc_data->reg_perf_state); index = min(index, LUT_MAX_ENTRIES - 1); return policy->freq_table[index].frequency; @@ -110,12 +119,13 @@ static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { - void __iomem *perf_state_reg = policy->driver_data; + struct qcom_cpufreq_data *data = policy->driver_data; + const struct qcom_cpufreq_soc_data *soc_data = data->soc_data; unsigned int index; unsigned long freq; index = policy->cached_resolved_idx; - writel_relaxed(index, perf_state_reg); + writel_relaxed(index, data->base + soc_data->reg_perf_state); freq = policy->freq_table[index].frequency; arch_set_freq_scale(policy->related_cpus, freq, @@ -125,8 +135,7 @@ static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, } static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, - struct cpufreq_policy *policy, - void __iomem *base) + struct cpufreq_policy *policy) { u32 data, src, lval, i, core_count, prev_freq = 0, freq; u32 volt; @@ -134,6 +143,8 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, struct dev_pm_opp *opp; unsigned long rate; int ret; + struct qcom_cpufreq_data *drv_data = policy->driver_data; + const struct qcom_cpufreq_soc_data *soc_data = drv_data->soc_data; table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL); if (!table) @@ -160,14 +171,14 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, } for (i = 0; i < LUT_MAX_ENTRIES; i++) { - data = readl_relaxed(base + REG_FREQ_LUT + - i * LUT_ROW_SIZE); + data = readl_relaxed(drv_data->base + soc_data->reg_freq_lut + + i * soc_data->lut_row_size); src = FIELD_GET(LUT_SRC, data); lval = FIELD_GET(LUT_L_VAL, data); core_count = FIELD_GET(LUT_CORE_COUNT, data); - data = readl_relaxed(base + REG_VOLT_LUT + - i * LUT_ROW_SIZE); + data = readl_relaxed(drv_data->base + soc_data->reg_volt_lut + + i * soc_data->lut_row_size); volt = FIELD_GET(LUT_VOLT, data) * 1000; if (src) @@ -237,6 +248,20 @@ static void qcom_get_related_cpus(int index, struct cpumask *m) } } +static const struct qcom_cpufreq_soc_data qcom_soc_data = { + .reg_enable = 0x0, + .reg_freq_lut = 0x110, + .reg_volt_lut = 0x114, + .reg_perf_state = 0x920, + .lut_row_size = 32, +}; + +static const struct of_device_id qcom_cpufreq_hw_match[] = { + { .compatible = "qcom,cpufreq-hw", .data = &qcom_soc_data }, + {} +}; +MODULE_DEVICE_TABLE(of, qcom_cpufreq_hw_match); + static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) { struct platform_device *pdev = cpufreq_get_driver_data(); @@ -245,6 +270,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) struct device_node *cpu_np; struct device *cpu_dev; void __iomem *base; + struct qcom_cpufreq_data *data; int ret, index; cpu_dev = get_cpu_device(policy->cpu); @@ -270,8 +296,17 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) if (IS_ERR(base)) return PTR_ERR(base); + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto error; + } + + data->soc_data = of_device_get_match_data(&pdev->dev); + data->base = base; + /* HW should be in enabled state to proceed */ - if (!(readl_relaxed(base + REG_ENABLE) & 0x1)) { + if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) { dev_err(dev, "Domain-%d cpufreq hardware not enabled\n", index); ret = -ENODEV; goto error; @@ -284,9 +319,9 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) goto error; } - policy->driver_data = base + REG_PERF_STATE; + policy->driver_data = data; - ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy, base); + ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy); if (ret) { dev_err(dev, "Domain-%d failed to read LUT\n", index); goto error; @@ -310,13 +345,13 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) { struct device *cpu_dev = get_cpu_device(policy->cpu); - void __iomem *base = policy->driver_data - REG_PERF_STATE; + struct qcom_cpufreq_data *data = policy->driver_data; struct platform_device *pdev = cpufreq_get_driver_data(); dev_pm_opp_remove_all_dynamic(cpu_dev); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); kfree(policy->freq_table); - devm_iounmap(&pdev->dev, base); + devm_iounmap(&pdev->dev, data->base); return 0; } @@ -386,12 +421,6 @@ static int qcom_cpufreq_hw_driver_remove(struct platform_device *pdev) return cpufreq_unregister_driver(&cpufreq_qcom_hw_driver); } -static const struct of_device_id qcom_cpufreq_hw_match[] = { - { .compatible = "qcom,cpufreq-hw" }, - {} -}; -MODULE_DEVICE_TABLE(of, qcom_cpufreq_hw_match); - static struct platform_driver qcom_cpufreq_hw_driver = { .probe = qcom_cpufreq_hw_driver_probe, .remove = qcom_cpufreq_hw_driver_remove, From 49b59f4c358cf71cb4e413749cd855478547c2ca Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 15 Sep 2020 12:54:23 +0530 Subject: [PATCH 28/65] cpufreq: qcom-hw: Add cpufreq support for SM8250 SoC SM8250 SoC uses EPSS block for carrying out the cpufreq duties. Hence, add support for it in the driver with relevant dev data. Signed-off-by: Manivannan Sadhasivam Reviewed-by: Amit Kucheria Reviewed-by: Bjorn Andersson Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index e3c46984a037f5..c485be8391726b 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -256,8 +256,17 @@ static const struct qcom_cpufreq_soc_data qcom_soc_data = { .lut_row_size = 32, }; +static const struct qcom_cpufreq_soc_data epss_soc_data = { + .reg_enable = 0x0, + .reg_freq_lut = 0x100, + .reg_volt_lut = 0x200, + .reg_perf_state = 0x320, + .lut_row_size = 4, +}; + static const struct of_device_id qcom_cpufreq_hw_match[] = { { .compatible = "qcom,cpufreq-hw", .data = &qcom_soc_data }, + { .compatible = "qcom,cpufreq-epss", .data = &epss_soc_data }, {} }; MODULE_DEVICE_TABLE(of, qcom_cpufreq_hw_match); From bc9b9c5ab9d8d16157737db539929d57562926e9 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 15 Sep 2020 10:10:54 -0700 Subject: [PATCH 29/65] cpufreq: qcom: Don't add frequencies without an OPP The driver currently adds all frequencies from the hardware LUT to the cpufreq table, regardless of whether the corresponding OPP exists. This prevents devices from disabling certain OPPs through the device tree and can result in CPU frequencies for which the interconnect bandwidth can't be adjusted. Only add frequencies with an OPP entry. Fixes: 55538fbc79e9 ("cpufreq: qcom: Read voltage LUT and populate OPP") Signed-off-by: Matthias Kaehlcke Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index c485be8391726b..17fbb8cf36a176 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -187,10 +187,15 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, freq = cpu_hw_rate / 1000; if (freq != prev_freq && core_count != LUT_TURBO_IND) { - table[i].frequency = freq; - qcom_cpufreq_update_opp(cpu_dev, freq, volt); - dev_dbg(cpu_dev, "index=%d freq=%d, core_count %d\n", i, + if (!qcom_cpufreq_update_opp(cpu_dev, freq, volt)) { + table[i].frequency = freq; + dev_dbg(cpu_dev, "index=%d freq=%d, core_count %d\n", i, freq, core_count); + } else { + dev_warn(cpu_dev, "failed to update OPP for freq=%d\n", freq); + table[i].frequency = CPUFREQ_ENTRY_INVALID; + } + } else if (core_count == LUT_TURBO_IND) { table[i].frequency = CPUFREQ_ENTRY_INVALID; } @@ -207,9 +212,13 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, * as the boost frequency */ if (prev->frequency == CPUFREQ_ENTRY_INVALID) { - prev->frequency = prev_freq; - prev->flags = CPUFREQ_BOOST_FREQ; - qcom_cpufreq_update_opp(cpu_dev, prev_freq, volt); + if (!qcom_cpufreq_update_opp(cpu_dev, prev_freq, volt)) { + prev->frequency = prev_freq; + prev->flags = CPUFREQ_BOOST_FREQ; + } else { + dev_warn(cpu_dev, "failed to update OPP for freq=%d\n", + freq); + } } break; From 0a10d3fe3e5c601031676e81b41fb2977650b4d4 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Tue, 1 Sep 2020 21:55:45 +0100 Subject: [PATCH 30/65] arch_topology: validate input frequencies to arch_set_freq_scale() The current frequency passed to arch_set_freq_scale() could end up being 0, signaling an error in setting a new frequency. Also, if the maximum frequency in 0, this will result in a division by 0 error. Therefore, validate these input values before using them for the setting of the frequency scale factor. Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar Reviewed-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/base/arch_topology.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 75f72d68429417..42a08ef693aeba 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -33,6 +33,9 @@ void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, unsigned long scale; int i; + if (WARN_ON_ONCE(!cur_freq || !max_freq)) + return; + /* * If the use of counters for FIE is enabled, just return as we don't * want to update the scale factor with information from CPUFREQ. From 1a0419b0db4647107a9cf4a28b331c343c0378a0 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Tue, 1 Sep 2020 21:55:46 +0100 Subject: [PATCH 31/65] cpufreq: move invariance setter calls in cpufreq core To properly scale its per-entity load-tracking signals, the task scheduler needs to be given a frequency scale factor, i.e. some image of the current frequency the CPU is running at. Currently, this scale can be computed either by using counters (APERF/MPERF on x86, AMU on arm64), or by piggy-backing on the frequency selection done by cpufreq. For the latter, drivers have to explicitly set the scale factor themselves, despite it being purely boiler-plate code: the required information depends entirely on the kind of frequency switch callback implemented by the driver, i.e. either of: target_index(), target(), fast_switch() and setpolicy(). The fitness of those callbacks with regard to driving the Frequency Invariance Engine (FIE) is studied below: target_index() ============== Documentation states that the chosen frequency "must be determined by freq_table[index].frequency". It isn't clear if it *has* to be that frequency, or if it can use that frequency value to do some computation that ultimately leads to a different frequency selection. All drivers go for the former, while the vexpress-spc-cpufreq has an atypical implementation which is handled separately. Therefore, the hook works on the assumption the core can use freq_table[index].frequency. target() ======= This has been flagged as deprecated since: commit 9c0ebcf78fde ("cpufreq: Implement light weight ->target_index() routine") It also doesn't have that many users: gx-suspmod.c:439: .target = cpufreq_gx_target, s3c24xx-cpufreq.c:428: .target = s3c_cpufreq_target, intel_pstate.c:2528: .target = intel_cpufreq_target, cppc_cpufreq.c:401: .target = cppc_cpufreq_set_target, cpufreq-nforce2.c:371: .target = nforce2_target, sh-cpufreq.c:163: .target = sh_cpufreq_target, pcc-cpufreq.c:573: .target = pcc_cpufreq_target, Similarly to the path taken for target_index() calls in the cpufreq core during a frequency change, all of the drivers above will mark the end of a frequency change by a call to cpufreq_freq_transition_end(). Therefore, cpufreq_freq_transition_end() can be used as the location for the arch_set_freq_scale() call to potentially inform the scheduler of the frequency change. This change maintains the previous functionality for the drivers that implement the target_index() callback, while also adding support for the few drivers that implement the deprecated target() callback. fast_switch() ============= This callback *has* to return the frequency that was selected. setpolicy() =========== This callback does not have any designated way of informing what was the end choice. But there are only two drivers using setpolicy(), and none of them have current FIE support: drivers/cpufreq/longrun.c:281: .setpolicy = longrun_set_policy, drivers/cpufreq/intel_pstate.c:2215: .setpolicy = intel_pstate_set_policy, The intel_pstate is known to use counter-driven frequency invariance. Conclusion ========== Given that the significant majority of current FIE enabled drivers use callbacks that lend themselves to triggering the setting of the FIE scale factor in a generic way, move the invariance setter calls to cpufreq core. As a result of setting the frequency scale factor in cpufreq core, after callbacks that lend themselves to trigger it, remove this functionality from the driver side. To be noted that despite marking a successful frequency change, many cpufreq drivers will consider the new frequency as the requested frequency, although this is might not be the one granted by the hardware. Therefore, the call to arch_set_freq_scale() is a "best effort" one, and it is up to the architecture if the new frequency is used in the new frequency scale factor setting (determined by the implementation of arch_set_freq_scale()) or eventually used by the scheduler (determined by the implementation of arch_scale_freq_capacity()). The architecture is in a better position to decide if it has better methods to obtain more accurate information regarding the current frequency and use that information instead (for example, the use of counters). Also, the implementation to arch_set_freq_scale() will now have to handle error conditions (current frequency == 0) in order to prevent the overhead in cpufreq core when the default arch_set_freq_scale() implementation is used. Signed-off-by: Ionela Voinescu Suggested-by: Valentin Schneider Acked-by: Viresh Kumar Acked-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 10 +--------- drivers/cpufreq/cpufreq.c | 12 +++++++++++- drivers/cpufreq/qcom-cpufreq-hw.c | 9 +-------- drivers/cpufreq/scmi-cpufreq.c | 12 ++---------- drivers/cpufreq/scpi-cpufreq.c | 6 +----- drivers/cpufreq/vexpress-spc-cpufreq.c | 12 ++---------- 6 files changed, 18 insertions(+), 43 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 944d7b45afe9a9..9fd4ce774f1295 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -40,16 +40,8 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) { struct private_data *priv = policy->driver_data; unsigned long freq = policy->freq_table[index].frequency; - int ret; - - ret = dev_pm_opp_set_rate(priv->cpu_dev, freq * 1000); - if (!ret) { - arch_set_freq_scale(policy->related_cpus, freq, - policy->cpuinfo.max_freq); - } - - return ret; + return dev_pm_opp_set_rate(priv->cpu_dev, freq * 1000); } /* diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 47aa90f9a7c2e2..4d5fe777184a99 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -446,6 +446,10 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy, cpufreq_notify_post_transition(policy, freqs, transition_failed); + arch_set_freq_scale(policy->related_cpus, + policy->cur, + policy->cpuinfo.max_freq); + policy->transition_ongoing = false; policy->transition_task = NULL; @@ -2056,9 +2060,15 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier); unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { + unsigned int freq; + target_freq = clamp_val(target_freq, policy->min, policy->max); + freq = cpufreq_driver->fast_switch(policy, target_freq); + + arch_set_freq_scale(policy->related_cpus, freq, + policy->cpuinfo.max_freq); - return cpufreq_driver->fast_switch(policy, target_freq); + return freq; } EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch); diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 3fb044b907a834..aabe4306d92f97 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -85,8 +85,6 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy, if (icc_scaling_enabled) qcom_cpufreq_set_bw(policy, freq); - arch_set_freq_scale(policy->related_cpus, freq, - policy->cpuinfo.max_freq); return 0; } @@ -113,16 +111,11 @@ static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, { void __iomem *perf_state_reg = policy->driver_data; unsigned int index; - unsigned long freq; index = policy->cached_resolved_idx; writel_relaxed(index, perf_state_reg); - freq = policy->freq_table[index].frequency; - arch_set_freq_scale(policy->related_cpus, freq, - policy->cpuinfo.max_freq); - - return freq; + return policy->freq_table[index].frequency; } static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index fb42e339037767..6dd1311660b561 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -48,16 +48,11 @@ static unsigned int scmi_cpufreq_get_rate(unsigned int cpu) static int scmi_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) { - int ret; struct scmi_data *priv = policy->driver_data; struct scmi_perf_ops *perf_ops = handle->perf_ops; u64 freq = policy->freq_table[index].frequency; - ret = perf_ops->freq_set(handle, priv->domain_id, freq * 1000, false); - if (!ret) - arch_set_freq_scale(policy->related_cpus, freq, - policy->cpuinfo.max_freq); - return ret; + return perf_ops->freq_set(handle, priv->domain_id, freq * 1000, false); } static unsigned int scmi_cpufreq_fast_switch(struct cpufreq_policy *policy, @@ -67,11 +62,8 @@ static unsigned int scmi_cpufreq_fast_switch(struct cpufreq_policy *policy, struct scmi_perf_ops *perf_ops = handle->perf_ops; if (!perf_ops->freq_set(handle, priv->domain_id, - target_freq * 1000, true)) { - arch_set_freq_scale(policy->related_cpus, target_freq, - policy->cpuinfo.max_freq); + target_freq * 1000, true)) return target_freq; - } return 0; } diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index b0f5388b88544f..43db05b949d956 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -47,9 +47,8 @@ static unsigned int scpi_cpufreq_get_rate(unsigned int cpu) static int scpi_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) { - unsigned long freq = policy->freq_table[index].frequency; + u64 rate = policy->freq_table[index].frequency * 1000; struct scpi_data *priv = policy->driver_data; - u64 rate = freq * 1000; int ret; ret = clk_set_rate(priv->clk, rate); @@ -60,9 +59,6 @@ scpi_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) if (clk_get_rate(priv->clk) != rate) return -EIO; - arch_set_freq_scale(policy->related_cpus, freq, - policy->cpuinfo.max_freq); - return 0; } diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index 4e8b1dee7c9aa2..e89b905754d211 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -182,7 +182,6 @@ static int ve_spc_cpufreq_set_target(struct cpufreq_policy *policy, { u32 cpu = policy->cpu, cur_cluster, new_cluster, actual_cluster; unsigned int freqs_new; - int ret; cur_cluster = cpu_to_cluster(cpu); new_cluster = actual_cluster = per_cpu(physical_cluster, cpu); @@ -197,15 +196,8 @@ static int ve_spc_cpufreq_set_target(struct cpufreq_policy *policy, new_cluster = A15_CLUSTER; } - ret = ve_spc_cpufreq_set_rate(cpu, actual_cluster, new_cluster, - freqs_new); - - if (!ret) { - arch_set_freq_scale(policy->related_cpus, freqs_new, - policy->cpuinfo.max_freq); - } - - return ret; + return ve_spc_cpufreq_set_rate(cpu, actual_cluster, new_cluster, + freqs_new); } static inline u32 get_table_count(struct cpufreq_frequency_table *table) From 874f635310648a5adcedbd7e02ea0555cfa1da56 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Tue, 1 Sep 2020 21:55:47 +0100 Subject: [PATCH 32/65] cpufreq: report whether cpufreq supports Frequency Invariance (FI) Now that the update of the FI scale factor is done in cpufreq core for selected functions - target(), target_index() and fast_switch(), we can provide feedback to the task scheduler and architecture code on whether cpufreq supports FI. For this purpose provide an external function to expose whether the cpufreq drivers support FI, by using a static key. The logic behind the enablement of cpufreq-based invariance is as follows: - cpufreq-based invariance is disabled by default - cpufreq-based invariance is enabled if any of the callbacks above is implemented while the unsupported setpolicy() is not The cpufreq_supports_freq_invariance() function only returns whether cpufreq is instrumented with the arch_set_freq_scale() calls that result in support for frequency invariance. Due to the lack of knowledge on whether the implementation of arch_set_freq_scale() actually results in the setting of a scale factor based on cpufreq information, it is up to the architecture code to ensure the setting and provision of the scale factor to the scheduler. Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 16 ++++++++++++++++ include/linux/cpufreq.h | 5 +++++ 2 files changed, 21 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 4d5fe777184a99..570bf2ebe9d4b3 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -61,6 +61,12 @@ static struct cpufreq_driver *cpufreq_driver; static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); static DEFINE_RWLOCK(cpufreq_driver_lock); +static DEFINE_STATIC_KEY_FALSE(cpufreq_freq_invariance); +bool cpufreq_supports_freq_invariance(void) +{ + return static_branch_likely(&cpufreq_freq_invariance); +} + /* Flag to suspend/resume CPUFreq governors */ static bool cpufreq_suspended; @@ -2720,6 +2726,15 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) cpufreq_driver = driver_data; write_unlock_irqrestore(&cpufreq_driver_lock, flags); + /* + * Mark support for the scheduler's frequency invariance engine for + * drivers that implement target(), target_index() or fast_switch(). + */ + if (!cpufreq_driver->setpolicy) { + static_branch_enable_cpuslocked(&cpufreq_freq_invariance); + pr_debug("supports frequency invariance"); + } + if (driver_data->setpolicy) driver_data->flags |= CPUFREQ_CONST_LOOPS; @@ -2789,6 +2804,7 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) cpus_read_lock(); subsys_interface_unregister(&cpufreq_interface); remove_boost_sysfs_file(); + static_branch_disable_cpuslocked(&cpufreq_freq_invariance); cpuhp_remove_state_nocalls_cpuslocked(hp_online); write_lock_irqsave(&cpufreq_driver_lock, flags); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a911e5d0684549..e54767e2a68a65 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -217,6 +217,7 @@ void refresh_frequency_limits(struct cpufreq_policy *policy); void cpufreq_update_policy(unsigned int cpu); void cpufreq_update_limits(unsigned int cpu); bool have_governor_per_policy(void); +bool cpufreq_supports_freq_invariance(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); void cpufreq_disable_fast_switch(struct cpufreq_policy *policy); @@ -237,6 +238,10 @@ static inline unsigned int cpufreq_get_hw_max_freq(unsigned int cpu) { return 0; } +static inline bool cpufreq_supports_freq_invariance(void) +{ + return false; +} static inline void disable_cpufreq(void) { } #endif From ecddc3a0d5d752071c627aa1a1d4d7b529ddae67 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 1 Sep 2020 21:55:48 +0100 Subject: [PATCH 33/65] arch_topology, cpufreq: constify arch_* cpumasks The passed cpumask arguments to arch_set_freq_scale() and arch_freq_counters_available() are only iterated over, so reflect this in the prototype. This also allows to pass system cpumasks like cpu_online_mask without getting a warning. Signed-off-by: Valentin Schneider Signed-off-by: Ionela Voinescu Acked-by: Catalin Marinas Acked-by: Viresh Kumar Reviewed-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- arch/arm64/kernel/topology.c | 2 +- drivers/base/arch_topology.c | 4 ++-- drivers/cpufreq/cpufreq.c | 5 +++-- include/linux/arch_topology.h | 2 +- include/linux/cpufreq.h | 3 ++- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 0801a0f3c156af..9a9f2b8dedf5b6 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -253,7 +253,7 @@ static int __init init_amu_fie(void) } late_initcall_sync(init_amu_fie); -bool arch_freq_counters_available(struct cpumask *cpus) +bool arch_freq_counters_available(const struct cpumask *cpus) { return amu_freq_invariant() && cpumask_subset(cpus, amu_fie_cpus); diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 42a08ef693aeba..91de5331ac8a5b 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -21,13 +21,13 @@ #include #include -__weak bool arch_freq_counters_available(struct cpumask *cpus) +__weak bool arch_freq_counters_available(const struct cpumask *cpus) { return false; } DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; -void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, +void arch_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, unsigned long max_freq) { unsigned long scale; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 570bf2ebe9d4b3..2ea245a6c0c074 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -160,8 +160,9 @@ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy) } EXPORT_SYMBOL_GPL(get_cpu_idle_time); -__weak void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, - unsigned long max_freq) +__weak void arch_set_freq_scale(const struct cpumask *cpus, + unsigned long cur_freq, + unsigned long max_freq) { } EXPORT_SYMBOL_GPL(arch_set_freq_scale); diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 69b1dabe39dc33..810c8333625779 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -30,7 +30,7 @@ static inline unsigned long topology_get_freq_scale(int cpu) return per_cpu(freq_scale, cpu); } -bool arch_freq_counters_available(struct cpumask *cpus); +bool arch_freq_counters_available(const struct cpumask *cpus); DECLARE_PER_CPU(unsigned long, thermal_pressure); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index e54767e2a68a65..9f779fbdbe7bb0 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1011,7 +1011,8 @@ static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy, extern void arch_freq_prepare_all(void); extern unsigned int arch_freq_get_on_cpu(int cpu); -extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, +extern void arch_set_freq_scale(const struct cpumask *cpus, + unsigned long cur_freq, unsigned long max_freq); /* the following are really really optional */ From 15e5d5b45b2b7072214af519357a1c0af078c50b Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 1 Sep 2020 21:55:49 +0100 Subject: [PATCH 34/65] arch_topology, arm, arm64: define arch_scale_freq_invariant() arch_scale_freq_invariant() is used by schedutil to determine whether the scheduler's load-tracking signals are frequency invariant. Its definition is overridable, though by default it is hardcoded to 'true' if arch_scale_freq_capacity() is defined ('false' otherwise). This behaviour is not overridden on arm, arm64 and other users of the generic arch topology driver, which is somewhat precarious: arch_scale_freq_capacity() will always be defined, yet not all cpufreq drivers are guaranteed to drive the frequency invariance scale factor setting. In other words, the load-tracking signals may very well *not* be frequency invariant. Now that cpufreq can be queried on whether the current driver is driving the Frequency Invariance (FI) scale setting, the current situation can be improved. This combines the query of whether cpufreq supports the setting of the frequency scale factor, with whether all online CPUs are counter-based FI enabled. While cpufreq FI enablement applies at system level, for all CPUs, counter-based FI support could also be used for only a subset of CPUs to set the invariance scale factor. Therefore, if cpufreq-based FI support is present, we consider the system to be invariant. If missing, we require all online CPUs to be counter-based FI enabled in order for the full system to be considered invariant. If the system ends up not being invariant, a new condition is needed in the counter initialization code that disables all scale factor setting based on counters. Precedence of counters over cpufreq use is not important here. The invariant status is only given to the system if all CPUs have at least one method of setting the frequency scale factor. Signed-off-by: Valentin Schneider Signed-off-by: Ionela Voinescu Acked-by: Catalin Marinas Acked-by: Viresh Kumar Reviewed-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- arch/arm/include/asm/topology.h | 1 + arch/arm64/include/asm/topology.h | 1 + arch/arm64/kernel/topology.c | 7 +++++++ drivers/base/arch_topology.c | 6 ++++++ include/linux/arch_topology.h | 2 ++ 5 files changed, 17 insertions(+) diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index e0593cf095d05a..9219e67befbe5d 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -9,6 +9,7 @@ /* Replace task scheduler's default frequency-invariant accounting */ #define arch_scale_freq_capacity topology_get_freq_scale +#define arch_scale_freq_invariant topology_scale_freq_invariant /* Replace task scheduler's default cpu-invariant accounting */ #define arch_scale_cpu_capacity topology_get_cpu_scale diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index e042f6527981e8..7cb519473fbd8a 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -27,6 +27,7 @@ void topology_scale_freq_tick(void); /* Replace task scheduler's default frequency-invariant accounting */ #define arch_scale_freq_capacity topology_get_freq_scale +#define arch_scale_freq_invariant topology_scale_freq_invariant /* Replace task scheduler's default cpu-invariant accounting */ #define arch_scale_cpu_capacity topology_get_cpu_scale diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 9a9f2b8dedf5b6..4064d39bb66d8e 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -246,6 +246,13 @@ static int __init init_amu_fie(void) static_branch_enable(&amu_fie_key); } + /* + * If the system is not fully invariant after AMU init, disable + * partial use of counters for frequency invariance. + */ + if (!topology_scale_freq_invariant()) + static_branch_disable(&amu_fie_key); + free_valid_mask: free_cpumask_var(valid_cpus); diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 91de5331ac8a5b..89cae168b076bb 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -21,6 +21,12 @@ #include #include +bool topology_scale_freq_invariant(void) +{ + return cpufreq_supports_freq_invariance() || + arch_freq_counters_available(cpu_online_mask); +} + __weak bool arch_freq_counters_available(const struct cpumask *cpus) { return false; diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 810c8333625779..083df331a3c9f4 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -30,6 +30,8 @@ static inline unsigned long topology_get_freq_scale(int cpu) return per_cpu(freq_scale, cpu); } +bool topology_scale_freq_invariant(void); + bool arch_freq_counters_available(const struct cpumask *cpus); DECLARE_PER_CPU(unsigned long, thermal_pressure); From 1170433e6611402b869c583fa1fbfd85106ff066 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 9 Jul 2020 20:35:32 +0300 Subject: [PATCH 35/65] cpuidle: tegra: Correctly handle result of arm_cpuidle_simple_enter() The enter() callback of CPUIDLE drivers returns index of the entered idle state on success or a negative value on failure. The negative value could any negative value, i.e. it doesn't necessarily needs to be a error code. That's because CPUIDLE core only cares about the fact of failure and not about the reason of the enter() failure. Like every other enter() callback, the arm_cpuidle_simple_enter() returns the entered idle-index on success. Unlike some of other drivers, it never fails. It happened that TEGRA_C1=index=err=0 in the code of cpuidle-tegra driver, and thus, there is no problem for the cpuidle-tegra driver created by the typo in the code which assumes that the arm_cpuidle_simple_enter() returns a error code. The arm_cpuidle_simple_enter() also may return a -ENODEV error if CPU_IDLE is disabled in a kernel's config, but all CPUIDLE drivers are disabled if CPU_IDLE is disabled, including the cpuidle-tegra driver. So we can't ever see the error code from arm_cpuidle_simple_enter() today. Of course the code may get some changes in the future and then the typo may transform into a real bug, so let's correct the typo! The tegra_cpuidle_state_enter() is now changed to make it return the entered idle-index on success and negative error code on fail, which puts it on par with the arm_cpuidle_simple_enter(), making code consistent in regards to the error handling. This patch fixes a minor typo in the code, it doesn't fix any bugs. Signed-off-by: Dmitry Osipenko Reviewed-by: Jon Hunter Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-tegra.c | 34 +++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/cpuidle/cpuidle-tegra.c b/drivers/cpuidle/cpuidle-tegra.c index a12fb141875a79..e8956706a2917a 100644 --- a/drivers/cpuidle/cpuidle-tegra.c +++ b/drivers/cpuidle/cpuidle-tegra.c @@ -172,7 +172,7 @@ static int tegra_cpuidle_coupled_barrier(struct cpuidle_device *dev) static int tegra_cpuidle_state_enter(struct cpuidle_device *dev, int index, unsigned int cpu) { - int ret; + int err; /* * CC6 state is the "CPU cluster power-off" state. In order to @@ -183,9 +183,9 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev, * CPU cores, GIC and L2 cache). */ if (index == TEGRA_CC6) { - ret = tegra_cpuidle_coupled_barrier(dev); - if (ret) - return ret; + err = tegra_cpuidle_coupled_barrier(dev); + if (err) + return err; } local_fiq_disable(); @@ -194,15 +194,15 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev, switch (index) { case TEGRA_C7: - ret = tegra_cpuidle_c7_enter(); + err = tegra_cpuidle_c7_enter(); break; case TEGRA_CC6: - ret = tegra_cpuidle_cc6_enter(cpu); + err = tegra_cpuidle_cc6_enter(cpu); break; default: - ret = -EINVAL; + err = -EINVAL; break; } @@ -210,7 +210,7 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev, tegra_pm_clear_cpu_in_lp2(); local_fiq_enable(); - return ret; + return err ?: index; } static int tegra_cpuidle_adjust_state_index(int index, unsigned int cpu) @@ -236,21 +236,27 @@ static int tegra_cpuidle_enter(struct cpuidle_device *dev, int index) { unsigned int cpu = cpu_logical_map(dev->cpu); - int err; + int ret; index = tegra_cpuidle_adjust_state_index(index, cpu); if (dev->states_usage[index].disable) return -1; if (index == TEGRA_C1) - err = arm_cpuidle_simple_enter(dev, drv, index); + ret = arm_cpuidle_simple_enter(dev, drv, index); else - err = tegra_cpuidle_state_enter(dev, index, cpu); + ret = tegra_cpuidle_state_enter(dev, index, cpu); - if (err && (err != -EINTR || index != TEGRA_CC6)) - pr_err_once("failed to enter state %d err: %d\n", index, err); + if (ret < 0) { + if (ret != -EINTR || index != TEGRA_CC6) + pr_err_once("failed to enter state %d err: %d\n", + index, ret); + index = -1; + } else { + index = ret; + } - return err ? -1 : index; + return index; } static int tegra114_enter_s2idle(struct cpuidle_device *dev, From 653f68b6ecd15cf147817bb01c2b7d02f4bffc8e Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 19 Aug 2020 11:23:54 +0800 Subject: [PATCH 36/65] ACPI: processor: Print more information when acpi_processor_evaluate_cst() fails Some platforms have bogus _CST which might cause unexpectd behavior in the CPU idle driver. Some bogus _CST might be unable to be disassembled by acpica-tools due to broken format. Print extra log if the _CST extraction/verification failes. This can be used to help narrow down why the CPU idle driver fails to behave as expected. Suggested-by: Zhang Rui Signed-off-by: Chen Yu [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_processor.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index b51ddf3bb61679..412a9725cc1eb7 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -798,22 +798,34 @@ int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, memset(&cx, 0, sizeof(cx)); element = &cst->package.elements[i]; - if (element->type != ACPI_TYPE_PACKAGE) + if (element->type != ACPI_TYPE_PACKAGE) { + acpi_handle_info(handle, "_CST C%d type(%x) is not package, skip...\n", + i, element->type); continue; + } - if (element->package.count != 4) + if (element->package.count != 4) { + acpi_handle_info(handle, "_CST C%d package count(%d) is not 4, skip...\n", + i, element->package.count); continue; + } obj = &element->package.elements[0]; - if (obj->type != ACPI_TYPE_BUFFER) + if (obj->type != ACPI_TYPE_BUFFER) { + acpi_handle_info(handle, "_CST C%d package element[0] type(%x) is not buffer, skip...\n", + i, obj->type); continue; + } reg = (struct acpi_power_register *)obj->buffer.pointer; obj = &element->package.elements[1]; - if (obj->type != ACPI_TYPE_INTEGER) + if (obj->type != ACPI_TYPE_INTEGER) { + acpi_handle_info(handle, "_CST C[%d] package element[1] type(%x) is not integer, skip...\n", + i, obj->type); continue; + } cx.type = obj->integer.value; /* @@ -850,6 +862,8 @@ int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, cx.entry_method = ACPI_CSTATE_HALT; snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); } else { + acpi_handle_info(handle, "_CST C%d declares FIXED_HARDWARE C-state but not supported in hardware, skip...\n", + i); continue; } } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { @@ -857,6 +871,8 @@ int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x", cx.address); } else { + acpi_handle_info(handle, "_CST C%d space_id(%x) neither FIXED_HARDWARE nor SYSTEM_IO, skip...\n", + i, reg->space_id); continue; } @@ -864,14 +880,20 @@ int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, cx.valid = 1; obj = &element->package.elements[2]; - if (obj->type != ACPI_TYPE_INTEGER) + if (obj->type != ACPI_TYPE_INTEGER) { + acpi_handle_info(handle, "_CST C%d package element[2] type(%x) not integer, skip...\n", + i, obj->type); continue; + } cx.latency = obj->integer.value; obj = &element->package.elements[3]; - if (obj->type != ACPI_TYPE_INTEGER) + if (obj->type != ACPI_TYPE_INTEGER) { + acpi_handle_info(handle, "_CST C%d package element[3] type(%x) not integer, skip...\n", + i, obj->type); continue; + } memcpy(&info->states[++last_index], &cx, sizeof(cx)); } From 10942019040c5557556ec22aae0f771b2a1a1a6d Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 1 Sep 2020 16:28:58 +0200 Subject: [PATCH 37/65] firmware: psci: Extend psci_set_osi_mode() to allow reset to PC mode The current user (cpuidle-psci) of psci_set_osi_mode() only needs to enable the PSCI OSI mode. Although, as subsequent changes shows, there is a need to be able to reset back into the PSCI PC mode. Therefore, let's extend psci_set_osi_mode() to take a bool as in-parameter, to let the user indicate whether to enable OSI or to switch back to PC mode. Reviewed-by: Sudeep Holla Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-psci-domain.c | 2 +- drivers/firmware/psci/psci.c | 12 +++++++----- include/linux/psci.h | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index b6e9649ab0da3f..b6ab0415f450af 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -278,7 +278,7 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev) goto remove_pd; /* Try to enable OSI mode. */ - ret = psci_set_osi_mode(); + ret = psci_set_osi_mode(true); if (ret) { pr_warn("failed to enable OSI mode: %d\n", ret); psci_pd_remove_topology(np); diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 92013ecc2d9ed7..00af99b6f97c15 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -151,12 +151,15 @@ static u32 psci_get_version(void) return invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); } -int psci_set_osi_mode(void) +int psci_set_osi_mode(bool enable) { + unsigned long suspend_mode; int err; - err = invoke_psci_fn(PSCI_1_0_FN_SET_SUSPEND_MODE, - PSCI_1_0_SUSPEND_MODE_OSI, 0, 0); + suspend_mode = enable ? PSCI_1_0_SUSPEND_MODE_OSI : + PSCI_1_0_SUSPEND_MODE_PC; + + err = invoke_psci_fn(PSCI_1_0_FN_SET_SUSPEND_MODE, suspend_mode, 0, 0); return psci_to_linux_errno(err); } @@ -546,8 +549,7 @@ static int __init psci_1_0_init(struct device_node *np) pr_info("OSI mode supported.\n"); /* Default to PC mode. */ - invoke_psci_fn(PSCI_1_0_FN_SET_SUSPEND_MODE, - PSCI_1_0_SUSPEND_MODE_PC, 0, 0); + psci_set_osi_mode(false); } return 0; diff --git a/include/linux/psci.h b/include/linux/psci.h index 14ad9b9ebcd66e..2a1bfb890e5887 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -18,7 +18,7 @@ bool psci_tos_resident_on(int cpu); int psci_cpu_suspend_enter(u32 state); bool psci_power_state_is_valid(u32 state); -int psci_set_osi_mode(void); +int psci_set_osi_mode(bool enable); bool psci_has_osi_support(void); struct psci_operations { From 70c179b49870929ca183421935415622d30875b5 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 1 Sep 2020 16:28:59 +0200 Subject: [PATCH 38/65] cpuidle: psci: Allow PM domain to be initialized even if no OSI mode If the PSCI OSI mode isn't supported or fails to be enabled, the PM domain topology with the genpd providers isn't initialized. This is perfectly fine from cpuidle-psci point of view. However, since the PM domain topology in the DTS files is a description of the HW, no matter of whether the PSCI OSI mode is supported or not, other consumers besides the CPUs may rely on it. Therefore, let's always allow the initialization of the PM domain topology to succeed, independently of whether the PSCI OSI mode is supported. Consequentially we need to track if we succeed to enable the OSI mode, as to know when a domain idlestate can be selected. Note that, CPU devices are still not being attached to the PM domain topology, unless the PSCI OSI mode is supported. Acked-by: Sudeep Holla Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-psci-domain.c | 59 ++++++++++++++------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index b6ab0415f450af..4a031c62f92a12 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -105,7 +105,7 @@ static void psci_pd_free_states(struct genpd_power_state *states, kfree(states); } -static int psci_pd_init(struct device_node *np) +static int psci_pd_init(struct device_node *np, bool use_osi) { struct generic_pm_domain *pd; struct psci_pd_provider *pd_provider; @@ -135,11 +135,16 @@ static int psci_pd_init(struct device_node *np) pd->free_states = psci_pd_free_states; pd->name = kbasename(pd->name); - pd->power_off = psci_pd_power_off; pd->states = states; pd->state_count = state_count; pd->flags |= GENPD_FLAG_IRQ_SAFE | GENPD_FLAG_CPU_DOMAIN; + /* Allow power off when OSI has been successfully enabled. */ + if (use_osi) + pd->power_off = psci_pd_power_off; + else + pd->flags |= GENPD_FLAG_ALWAYS_ON; + /* Use governor for CPU PM domains if it has some states to manage. */ pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL; @@ -190,7 +195,7 @@ static void psci_pd_remove(void) } } -static int psci_pd_init_topology(struct device_node *np, bool add) +static int psci_pd_init_topology(struct device_node *np) { struct device_node *node; struct of_phandle_args child, parent; @@ -203,9 +208,7 @@ static int psci_pd_init_topology(struct device_node *np, bool add) child.np = node; child.args_count = 0; - - ret = add ? of_genpd_add_subdomain(&parent, &child) : - of_genpd_remove_subdomain(&parent, &child); + ret = of_genpd_add_subdomain(&parent, &child); of_node_put(parent.np); if (ret) { of_node_put(node); @@ -216,14 +219,20 @@ static int psci_pd_init_topology(struct device_node *np, bool add) return 0; } -static int psci_pd_add_topology(struct device_node *np) +static bool psci_pd_try_set_osi_mode(void) { - return psci_pd_init_topology(np, true); -} + int ret; -static void psci_pd_remove_topology(struct device_node *np) -{ - psci_pd_init_topology(np, false); + if (!psci_has_osi_support()) + return false; + + ret = psci_set_osi_mode(true); + if (ret) { + pr_warn("failed to enable OSI mode: %d\n", ret); + return false; + } + + return true; } static void psci_cpuidle_domain_sync_state(struct device *dev) @@ -244,14 +253,14 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; struct device_node *node; + bool use_osi; int ret = 0, pd_count = 0; if (!np) return -ENODEV; - /* Currently limit the hierarchical topology to be used in OSI mode. */ - if (!psci_has_osi_support()) - return 0; + /* If OSI mode is supported, let's try to enable it. */ + use_osi = psci_pd_try_set_osi_mode(); /* * Parse child nodes for the "#power-domain-cells" property and @@ -261,7 +270,7 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev) if (!of_find_property(node, "#power-domain-cells", NULL)) continue; - ret = psci_pd_init(node); + ret = psci_pd_init(node, use_osi); if (ret) goto put_node; @@ -270,30 +279,24 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev) /* Bail out if not using the hierarchical CPU topology. */ if (!pd_count) - return 0; + goto no_pd; /* Link genpd masters/subdomains to model the CPU topology. */ - ret = psci_pd_add_topology(np); + ret = psci_pd_init_topology(np); if (ret) goto remove_pd; - /* Try to enable OSI mode. */ - ret = psci_set_osi_mode(true); - if (ret) { - pr_warn("failed to enable OSI mode: %d\n", ret); - psci_pd_remove_topology(np); - goto remove_pd; - } - pr_info("Initialized CPU PM domain topology\n"); return 0; put_node: of_node_put(node); remove_pd: - if (pd_count) - psci_pd_remove(); + psci_pd_remove(); pr_err("failed to create CPU PM domains ret=%d\n", ret); +no_pd: + if (use_osi) + psci_set_osi_mode(false); return ret; } From 3ffe2e7318ba705b8e0060d4696cafc75170e819 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 15 Sep 2020 11:26:29 +0800 Subject: [PATCH 39/65] PM: AVS: qcom-cpr: simplify the return expression of cpr_disable() Simplify the return expression. Signed-off-by: Liu Shixin Reviewed-by: Bjorn Andersson Acked-by: Kevin Hilman [ rjw: Minor subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/power/avs/qcom-cpr.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/power/avs/qcom-cpr.c b/drivers/power/avs/qcom-cpr.c index bd7c3e48b38600..b24cc77d1889fb 100644 --- a/drivers/power/avs/qcom-cpr.c +++ b/drivers/power/avs/qcom-cpr.c @@ -665,8 +665,6 @@ static int cpr_enable(struct cpr_drv *drv) static int cpr_disable(struct cpr_drv *drv) { - int ret; - mutex_lock(&drv->lock); if (cpr_is_allowed(drv)) { @@ -676,11 +674,7 @@ static int cpr_disable(struct cpr_drv *drv) mutex_unlock(&drv->lock); - ret = regulator_disable(drv->vdd_apc); - if (ret) - return ret; - - return 0; + return regulator_disable(drv->vdd_apc); } static int cpr_config(struct cpr_drv *drv) From bf23e1cd46d6a3277aad5921e14eac40bf32b90a Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Fri, 18 Sep 2020 13:16:33 -0700 Subject: [PATCH 40/65] MAINTAINERS: drop myself from PM AVS drivers I haven't had the time or the expertise to adequately review and maintain these drivers for awhile, so make it official. Signed-off-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d746519253c3a3..4f1a56f6efaa70 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5380,7 +5380,6 @@ F: include/linux/kobj* F: lib/kobj* DRIVERS FOR ADAPTIVE VOLTAGE SCALING (AVS) -M: Kevin Hilman M: Nishanth Menon L: linux-pm@vger.kernel.org S: Maintained From f49735f4978f479b0de4f50ab217d5a56bc83c55 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Tue, 22 Sep 2020 12:34:16 -0600 Subject: [PATCH 41/65] cpuidle: record state entry rejection statistics CPUs may fail to enter the chosen idle state if there was a pending interrupt, causing the cpuidle driver to return an error value. Record that and export it via sysfs along with the other idle state statistics. This could prove useful in understanding behavior of the governor and the system during usecases that involve multiple CPUs. Signed-off-by: Lina Iyer [ rjw: Changelog and documentation edits ] Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/cpuidle.rst | 9 +++++++++ drivers/cpuidle/cpuidle.c | 1 + drivers/cpuidle/sysfs.c | 3 +++ include/linux/cpuidle.h | 1 + 4 files changed, 14 insertions(+) diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index a96a423e377918..830868e526f482 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst @@ -528,6 +528,10 @@ object corresponding to it, as follows: Total number of times the hardware has been asked by the given CPU to enter this idle state. +``rejected`` + Total number of times a request to enter this idle state on the given + CPU was rejected. + The :file:`desc` and :file:`name` files both contain strings. The difference between them is that the name is expected to be more concise, while the description may be longer and it may contain white space or special characters. @@ -572,6 +576,11 @@ particular case. For these reasons, the only reliable way to find out how much time has been spent by the hardware in different idle states supported by it is to use idle state residency counters in the hardware, if available. +Generally, an interrupt received when trying to enter an idle state causes the +idle state entry request to be rejected, in which case the ``CPUIdle`` driver +may return an error code to indicate that this was the case. The :file:`usage` +and :file:`rejected` files report the number of times the given idle state +was entered successfully or rejected, respectively. .. _cpu-pm-qos: diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 6c7e5621cf9a8d..0ed5030b89d6d7 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -307,6 +307,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, } } else { dev->last_residency_ns = 0; + dev->states_usage[index].rejected++; } return entered_state; diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 091d1caceb4174..53ec9585ccd448 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -256,6 +256,7 @@ define_show_state_time_function(exit_latency) define_show_state_time_function(target_residency) define_show_state_function(power_usage) define_show_state_ull_function(usage) +define_show_state_ull_function(rejected) define_show_state_str_function(name) define_show_state_str_function(desc) define_show_state_ull_function(above) @@ -312,6 +313,7 @@ define_one_state_ro(latency, show_state_exit_latency); define_one_state_ro(residency, show_state_target_residency); define_one_state_ro(power, show_state_power_usage); define_one_state_ro(usage, show_state_usage); +define_one_state_ro(rejected, show_state_rejected); define_one_state_ro(time, show_state_time); define_one_state_rw(disable, show_state_disable, store_state_disable); define_one_state_ro(above, show_state_above); @@ -325,6 +327,7 @@ static struct attribute *cpuidle_state_default_attrs[] = { &attr_residency.attr, &attr_power.attr, &attr_usage.attr, + &attr_rejected.attr, &attr_time.attr, &attr_disable.attr, &attr_above.attr, diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 6175c77bf25e7a..ed0da0e58e8b00 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -38,6 +38,7 @@ struct cpuidle_state_usage { u64 time_ns; unsigned long long above; /* Number of times it's been too deep */ unsigned long long below; /* Number of times it's been too shallow */ + unsigned long long rejected; /* Number of times idle entry was rejected */ #ifdef CONFIG_SUSPEND unsigned long long s2idle_usage; unsigned long long s2idle_time; /* in US */ From 00610935158fcc812a6425b8bda3f87c8689b805 Mon Sep 17 00:00:00 2001 From: Pujin Shi Date: Tue, 22 Sep 2020 12:46:52 +0800 Subject: [PATCH 42/65] powercap: include header to fix -Wmissing-prototypes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include the linux/idle_inject.h header to fix W=1 build warning: drivers/powercap/idle_inject.c:152:6: warning: no previous prototype for ‘idle_inject_set_duration’ [-Wmissing-prototypes] drivers/powercap/idle_inject.c:167:6: warning: no previous prototype for ‘idle_inject_get_duration’ [-Wmissing-prototypes] drivers/powercap/idle_inject.c:179:6: warning: no previous prototype for ‘idle_inject_set_latency’ [-Wmissing-prototypes] drivers/powercap/idle_inject.c:195:5: warning: no previous prototype for ‘idle_inject_start’ [-Wmissing-prototypes] drivers/powercap/idle_inject.c:227:6: warning: no previous prototype for ‘idle_inject_stop’ [-Wmissing-prototypes] drivers/powercap/idle_inject.c:299:28: warning: no previous prototype for ‘idle_inject_register’ [-Wmissing-prototypes] drivers/powercap/idle_inject.c:345:6: warning: no previous prototype for ‘idle_inject_unregister’ [-Wmissing-prototypes] Signed-off-by: Pujin Shi Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index 4310901a074ec5..6e1a0043c41104 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -43,6 +43,7 @@ #include #include #include +#include #include From d12544fb2aa9944b180c35914031a8384ab082c1 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Tue, 22 Sep 2020 21:11:06 +0800 Subject: [PATCH 43/65] PM: runtime: Remove link state checks in rpm_get/put_supplier() To support runtime PM for hisi SAS driver (the driver is in directory drivers/scsi/hisi_sas), we add device link between scsi_device->sdev_gendev (consumer device) and hisi_hba->dev(supplier device) with flags DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE. After runtime suspended consumers and supplier, unload the dirver which causes a hung. We found that it called function device_release_driver_internal() to release the supplier device (hisi_hba->dev), as the device link was busy, it set the device link state to DL_STATE_SUPPLIER_UNBIND, and then it called device_release_driver_internal() to release the consumer device (scsi_device->sdev_gendev). Then it would try to call pm_runtime_get_sync() to resume the consumer device, but because consumer-supplier relation existed, it would try to resume the supplier first, but as the link state was already DL_STATE_SUPPLIER_UNBIND, so it skipped resuming the supplier and only resumed the consumer which hanged (it sends IOs to resume scsi_device while the SAS controller is suspended). Simple flow is as follows: device_release_driver_internal -> (supplier device) if device_links_busy -> device_links_unbind_consumers -> ... WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND) device_release_driver_internal (consumer device) pm_runtime_get_sync -> (consumer device) ... __rpm_callback -> rpm_get_suppliers -> if link->state == DL_STATE_SUPPLIER_UNBIND -> skip the action of resuming the supplier ... pm_runtime_clean_up_links ... Correct suspend/resume ordering between a supplier device and its consumer devices (resume the supplier device before resuming consumer devices, and suspend consumer devices before suspending the supplier device) should be guaranteed by runtime PM, but the state checks in rpm_get_supplier() and rpm_put_supplier() break this rule, so remove them. Signed-off-by: Xiang Chen [ rjw: Subject and changelog edits ] Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 8143210a5c5477..6f605f7820bb5f 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -291,8 +291,7 @@ static int rpm_get_suppliers(struct device *dev) device_links_read_lock_held()) { int retval; - if (!(link->flags & DL_FLAG_PM_RUNTIME) || - READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND) + if (!(link->flags & DL_FLAG_PM_RUNTIME)) continue; retval = pm_runtime_get_sync(link->supplier); @@ -312,8 +311,6 @@ static void rpm_put_suppliers(struct device *dev) list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, device_links_read_lock_held()) { - if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND) - continue; while (refcount_dec_not_one(&link->rpm_active)) pm_runtime_put(link->supplier); From 55c4478a8f0ecedc0c1a0c9379380249985c372a Mon Sep 17 00:00:00 2001 From: Xiaoyi Chen Date: Tue, 22 Sep 2020 16:19:19 +0000 Subject: [PATCH 44/65] PM: hibernate: Batch hibernate and resume IO requests Hibernate and resume process submits individual IO requests for each page of the data, so use blk_plug to improve the batching of these requests. Testing this change with hibernate and resumes consistently shows merging of the IO requests and more than an order of magnitude improvement in hibernate and resume speed is observed. One hibernate and resume cycle for 16GB RAM out of 32GB in use takes around 21 minutes before the change, and 1 minutes after the change on a system with limited storage IOPS. Signed-off-by: Xiaoyi Chen Co-Developed-by: Anchal Agarwal Signed-off-by: Anchal Agarwal [ rjw: Subject and changelog edits, white space damage fixes ] Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 01e2858b5fe367..116320a0394dd4 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -226,6 +226,7 @@ struct hib_bio_batch { atomic_t count; wait_queue_head_t wait; blk_status_t error; + struct blk_plug plug; }; static void hib_init_batch(struct hib_bio_batch *hb) @@ -233,6 +234,12 @@ static void hib_init_batch(struct hib_bio_batch *hb) atomic_set(&hb->count, 0); init_waitqueue_head(&hb->wait); hb->error = BLK_STS_OK; + blk_start_plug(&hb->plug); +} + +static void hib_finish_batch(struct hib_bio_batch *hb) +{ + blk_finish_plug(&hb->plug); } static void hib_end_io(struct bio *bio) @@ -294,6 +301,10 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr, static blk_status_t hib_wait_io(struct hib_bio_batch *hb) { + /* + * We are relying on the behavior of blk_plug that a thread with + * a plug will flush the plug list before sleeping. + */ wait_event(hb->wait, atomic_read(&hb->count) == 0); return blk_status_to_errno(hb->error); } @@ -561,6 +572,7 @@ static int save_image(struct swap_map_handle *handle, nr_pages++; } err2 = hib_wait_io(&hb); + hib_finish_batch(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -854,6 +866,7 @@ static int save_image_lzo(struct swap_map_handle *handle, pr_info("Image saving done\n"); swsusp_show_speed(start, stop, nr_to_write, "Wrote"); out_clean: + hib_finish_batch(&hb); if (crc) { if (crc->thr) kthread_stop(crc->thr); @@ -1084,6 +1097,7 @@ static int load_image(struct swap_map_handle *handle, nr_pages++; } err2 = hib_wait_io(&hb); + hib_finish_batch(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -1447,6 +1461,7 @@ static int load_image_lzo(struct swap_map_handle *handle, } swsusp_show_speed(start, stop, nr_to_read, "Read"); out_clean: + hib_finish_batch(&hb); for (i = 0; i < ring_size; i++) free_page((unsigned long)page[i]); if (crc) { From 6b61d49a55796dbbc479eeb4465e59fd656c719c Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 18 Sep 2020 19:55:18 +0300 Subject: [PATCH 45/65] PM: runtime: Fix timer_expires data type on 32-bit arches Commit 8234f6734c5d ("PM-runtime: Switch autosuspend over to using hrtimers") switched PM runtime autosuspend to use hrtimers and all related time accounting in ns, but missed to update the timer_expires data type in struct dev_pm_info to u64. This causes the timer_expires value to be truncated on 32-bit architectures when assignment is done from u64 values: rpm_suspend() |- dev->power.timer_expires = expires; Fix it by changing the timer_expires type to u64. Fixes: 8234f6734c5d ("PM-runtime: Switch autosuspend over to using hrtimers") Signed-off-by: Grygorii Strashko Acked-by: Pavel Machek Acked-by: Vincent Guittot Cc: 5.0+ # 5.0+ [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pm.h b/include/linux/pm.h index a30a4b54df528b..47aca6bac1d6a2 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -590,7 +590,7 @@ struct dev_pm_info { #endif #ifdef CONFIG_PM struct hrtimer suspend_timer; - unsigned long timer_expires; + u64 timer_expires; struct work_struct work; wait_queue_head_t wait_queue; struct wake_irq *wakeirq; From 7b38b7b0427df70237e3c9c73f2db6b99be2b4b9 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 8 Sep 2020 19:24:45 +0900 Subject: [PATCH 46/65] PM / devfreq: Add devfreq_get_devfreq_by_node function Split off part of devfreq_get_devfreq_by_phandle into a separate function. This allows callers to fetch devfreq instances by enumerating devicetree instead of explicit phandles. Acked-by: Krzysztof Kozlowski Reviewed-by: Lukasz Luba Signed-off-by: Leonard Crestez [cw00.choi: Export devfreq_get_devfreq_by_node function and add function to devfreq.h when CONFIG_PM_DEVFREQ is enabled.] Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 46 +++++++++++++++++++++++++++++---------- include/linux/devfreq.h | 6 +++++ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 071b59fe84d2ea..d4424b5d830606 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -983,6 +983,32 @@ struct devfreq *devm_devfreq_add_device(struct device *dev, EXPORT_SYMBOL(devm_devfreq_add_device); #ifdef CONFIG_OF +/* + * devfreq_get_devfreq_by_node - Get the devfreq device from devicetree + * @node - pointer to device_node + * + * return the instance of devfreq device + */ +struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node) +{ + struct devfreq *devfreq; + + if (!node) + return ERR_PTR(-EINVAL); + + mutex_lock(&devfreq_list_lock); + list_for_each_entry(devfreq, &devfreq_list, node) { + if (devfreq->dev.parent + && devfreq->dev.parent->of_node == node) { + mutex_unlock(&devfreq_list_lock); + return devfreq; + } + } + mutex_unlock(&devfreq_list_lock); + + return ERR_PTR(-ENODEV); +} + /* * devfreq_get_devfreq_by_phandle - Get the devfreq device from devicetree * @dev - instance to the given device @@ -1005,26 +1031,24 @@ struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index) if (!node) return ERR_PTR(-ENODEV); - mutex_lock(&devfreq_list_lock); - list_for_each_entry(devfreq, &devfreq_list, node) { - if (devfreq->dev.parent - && devfreq->dev.parent->of_node == node) { - mutex_unlock(&devfreq_list_lock); - of_node_put(node); - return devfreq; - } - } - mutex_unlock(&devfreq_list_lock); + devfreq = devfreq_get_devfreq_by_node(node); of_node_put(node); - return ERR_PTR(-EPROBE_DEFER); + return devfreq; } + #else +struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node) +{ + return ERR_PTR(-ENODEV); +} + struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index) { return ERR_PTR(-ENODEV); } #endif /* CONFIG_OF */ +EXPORT_SYMBOL_GPL(devfreq_get_devfreq_by_node); EXPORT_SYMBOL_GPL(devfreq_get_devfreq_by_phandle); /** diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 12782fbb4c25eb..eb971b8e5051b0 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -261,6 +261,7 @@ void devm_devfreq_unregister_notifier(struct device *dev, struct devfreq *devfreq, struct notifier_block *nb, unsigned int list); +struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node); struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index); #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) @@ -414,6 +415,11 @@ static inline void devm_devfreq_unregister_notifier(struct device *dev, { } +static inline struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node) +{ + return ERR_PTR(-ENODEV); +} + static inline struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index) { From 86d90fd95bbc3b3fdc2ef0507b7324cd1d0a358b Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 8 Sep 2020 19:24:46 +0900 Subject: [PATCH 47/65] PM / devfreq: Change prototype of devfreq_get_devfreq_by_phandle function Previously, devfreq core support 'devfreq' property in order to get the devfreq device by phandle. But, 'devfreq' property name is not proper on devicetree binding because this name doesn't mean the any h/w attribute. The devfreq core hand over the right to decide the property name for getting the devfreq device on devicetree. Each devfreq driver will decide the property name on devicetree binding and pass the their own property name to devfreq_get_devfreq_by_phandle function. Acked-by: Krzysztof Kozlowski Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 11 +++++++---- drivers/devfreq/exynos-bus.c | 2 +- include/linux/devfreq.h | 5 +++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index d4424b5d830606..861c100f9fac34 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1012,22 +1012,24 @@ struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node) /* * devfreq_get_devfreq_by_phandle - Get the devfreq device from devicetree * @dev - instance to the given device + * @phandle_name - name of property holding a phandle value * @index - index into list of devfreq * * return the instance of devfreq device */ -struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index) +struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, + const char *phandle_name, int index) { struct device_node *node; struct devfreq *devfreq; - if (!dev) + if (!dev || !phandle_name) return ERR_PTR(-EINVAL); if (!dev->of_node) return ERR_PTR(-EINVAL); - node = of_parse_phandle(dev->of_node, "devfreq", index); + node = of_parse_phandle(dev->of_node, phandle_name, index); if (!node) return ERR_PTR(-ENODEV); @@ -1043,7 +1045,8 @@ struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node) return ERR_PTR(-ENODEV); } -struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index) +struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, + const char *phandle_name, int index) { return ERR_PTR(-ENODEV); } diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index 8fa8eb5413732f..58dbf51f0983d5 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -360,7 +360,7 @@ static int exynos_bus_profile_init_passive(struct exynos_bus *bus, profile->exit = exynos_bus_passive_exit; /* Get the instance of parent devfreq device */ - parent_devfreq = devfreq_get_devfreq_by_phandle(dev, 0); + parent_devfreq = devfreq_get_devfreq_by_phandle(dev, "devfreq", 0); if (IS_ERR(parent_devfreq)) return -EPROBE_DEFER; diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index eb971b8e5051b0..2f4a74efa6bed3 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -262,7 +262,8 @@ void devm_devfreq_unregister_notifier(struct device *dev, struct notifier_block *nb, unsigned int list); struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node); -struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index); +struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, + const char *phandle_name, int index); #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) /** @@ -421,7 +422,7 @@ static inline struct devfreq *devfreq_get_devfreq_by_node(struct device_node *no } static inline struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, - int index) + const char *phandle_name, int index) { return ERR_PTR(-ENODEV); } From 02bdbf7d09c059b16047e3d4a05e6d584dd993b6 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 8 Sep 2020 19:24:47 +0900 Subject: [PATCH 48/65] PM / devfreq: event: Change prototype of devfreq_event_get_edev_by_phandle function Previously, devfreq core support 'devfreq-events' property in order to get the devfreq-event device by phandle. But, 'devfreq-events' property name is not proper on devicetree binding because this name doesn't mean the any h/w attribute. The devfreq-event core hand over the rights to decide the property name for getting the devfreq-event device on devicetree. Each devfreq-event driver will decide the property name on devicetree binding and then pass the their own property name to devfreq_event_get_edev_by_phandle function. And change the prototype of devfreq_event_get_edev_count function because of used deprecated 'devfreq-events' property. Acked-by: Krzysztof Kozlowski Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq-event.c | 14 ++++++++------ drivers/devfreq/exynos-bus.c | 5 +++-- drivers/devfreq/rk3399_dmc.c | 2 +- drivers/memory/samsung/exynos5422-dmc.c | 6 ++++-- include/linux/devfreq-event.h | 14 ++++++++++---- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c index 56efbeb7851e7a..6765c03334bc89 100644 --- a/drivers/devfreq/devfreq-event.c +++ b/drivers/devfreq/devfreq-event.c @@ -213,20 +213,21 @@ EXPORT_SYMBOL_GPL(devfreq_event_reset_event); * devfreq_event_get_edev_by_phandle() - Get the devfreq-event dev from * devicetree. * @dev : the pointer to the given device + * @phandle_name: name of property holding a phandle value * @index : the index into list of devfreq-event device * * Note that this function return the pointer of devfreq-event device. */ struct devfreq_event_dev *devfreq_event_get_edev_by_phandle(struct device *dev, - int index) + const char *phandle_name, int index) { struct device_node *node; struct devfreq_event_dev *edev; - if (!dev->of_node) + if (!dev->of_node || !phandle_name) return ERR_PTR(-EINVAL); - node = of_parse_phandle(dev->of_node, "devfreq-events", index); + node = of_parse_phandle(dev->of_node, phandle_name, index); if (!node) return ERR_PTR(-ENODEV); @@ -258,19 +259,20 @@ EXPORT_SYMBOL_GPL(devfreq_event_get_edev_by_phandle); /** * devfreq_event_get_edev_count() - Get the count of devfreq-event dev * @dev : the pointer to the given device + * @phandle_name: name of property holding a phandle value * * Note that this function return the count of devfreq-event devices. */ -int devfreq_event_get_edev_count(struct device *dev) +int devfreq_event_get_edev_count(struct device *dev, const char *phandle_name) { int count; - if (!dev->of_node) { + if (!dev->of_node || !phandle_name) { dev_err(dev, "device does not have a device node entry\n"); return -EINVAL; } - count = of_property_count_elems_of_size(dev->of_node, "devfreq-events", + count = of_property_count_elems_of_size(dev->of_node, phandle_name, sizeof(u32)); if (count < 0) { dev_err(dev, diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index 58dbf51f0983d5..1e684a448c9efe 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -193,7 +193,7 @@ static int exynos_bus_parent_parse_of(struct device_node *np, * Get the devfreq-event devices to get the current utilization of * buses. This raw data will be used in devfreq ondemand governor. */ - count = devfreq_event_get_edev_count(dev); + count = devfreq_event_get_edev_count(dev, "devfreq-events"); if (count < 0) { dev_err(dev, "failed to get the count of devfreq-event dev\n"); ret = count; @@ -209,7 +209,8 @@ static int exynos_bus_parent_parse_of(struct device_node *np, } for (i = 0; i < count; i++) { - bus->edev[i] = devfreq_event_get_edev_by_phandle(dev, i); + bus->edev[i] = devfreq_event_get_edev_by_phandle(dev, + "devfreq-events", i); if (IS_ERR(bus->edev[i])) { ret = -EPROBE_DEFER; goto err_regulator; diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index 027769e39f9b86..2e912166a9934f 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -341,7 +341,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) return PTR_ERR(data->dmc_clk); } - data->edev = devfreq_event_get_edev_by_phandle(dev, 0); + data->edev = devfreq_event_get_edev_by_phandle(dev, "devfreq-events", 0); if (IS_ERR(data->edev)) return -EPROBE_DEFER; diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c index b9c7956e503147..714d1f6f077c23 100644 --- a/drivers/memory/samsung/exynos5422-dmc.c +++ b/drivers/memory/samsung/exynos5422-dmc.c @@ -1293,7 +1293,8 @@ static int exynos5_performance_counters_init(struct exynos5_dmc *dmc) int counters_size; int ret, i; - dmc->num_counters = devfreq_event_get_edev_count(dmc->dev); + dmc->num_counters = devfreq_event_get_edev_count(dmc->dev, + "devfreq-events"); if (dmc->num_counters < 0) { dev_err(dmc->dev, "could not get devfreq-event counters\n"); return dmc->num_counters; @@ -1306,7 +1307,8 @@ static int exynos5_performance_counters_init(struct exynos5_dmc *dmc) for (i = 0; i < dmc->num_counters; i++) { dmc->counter[i] = - devfreq_event_get_edev_by_phandle(dmc->dev, i); + devfreq_event_get_edev_by_phandle(dmc->dev, + "devfreq-events", i); if (IS_ERR_OR_NULL(dmc->counter[i])) return -EPROBE_DEFER; } diff --git a/include/linux/devfreq-event.h b/include/linux/devfreq-event.h index f14f17f8cb7f5e..4a50a5c71a5ff0 100644 --- a/include/linux/devfreq-event.h +++ b/include/linux/devfreq-event.h @@ -106,8 +106,11 @@ extern int devfreq_event_get_event(struct devfreq_event_dev *edev, struct devfreq_event_data *edata); extern int devfreq_event_reset_event(struct devfreq_event_dev *edev); extern struct devfreq_event_dev *devfreq_event_get_edev_by_phandle( - struct device *dev, int index); -extern int devfreq_event_get_edev_count(struct device *dev); + struct device *dev, + const char *phandle_name, + int index); +extern int devfreq_event_get_edev_count(struct device *dev, + const char *phandle_name); extern struct devfreq_event_dev *devfreq_event_add_edev(struct device *dev, struct devfreq_event_desc *desc); extern int devfreq_event_remove_edev(struct devfreq_event_dev *edev); @@ -152,12 +155,15 @@ static inline int devfreq_event_reset_event(struct devfreq_event_dev *edev) } static inline struct devfreq_event_dev *devfreq_event_get_edev_by_phandle( - struct device *dev, int index) + struct device *dev, + const char *phandle_name, + int index) { return ERR_PTR(-EINVAL); } -static inline int devfreq_event_get_edev_count(struct device *dev) +static inline int devfreq_event_get_edev_count(struct device *dev, + const char *phandle_name) { return -EINVAL; } From d353d1202b89ab039acd079cd97f7646058ebe11 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 27 Sep 2020 23:51:39 +0300 Subject: [PATCH 49/65] PM / devfreq: tegra30: Improve initial hardware resetting It's safe to enable the ACTMON clock at any time during driver probing, even if we don't know the state of hardware, because it's used only for collecting and processing stats, and interrupt is kept disabled. This allows us to slightly improve code which performs initial hardware resetting by making use of a single reset_control_reset() instead of assert/deassert pair. Secondly, a potential error of the reset-control API is handled nicely now. Signed-off-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- drivers/devfreq/tegra30-devfreq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index dedd39de736752..f5e74c2ede85dd 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -822,8 +822,6 @@ static int tegra_devfreq_probe(struct platform_device *pdev) return err; } - reset_control_assert(tegra->reset); - err = clk_prepare_enable(tegra->clock); if (err) { dev_err(&pdev->dev, @@ -831,7 +829,11 @@ static int tegra_devfreq_probe(struct platform_device *pdev) return err; } - reset_control_deassert(tegra->reset); + err = reset_control_reset(tegra->reset); + if (err) { + dev_err(&pdev->dev, "Failed to reset hardware: %d\n", err); + goto disable_clk; + } rate = clk_round_rate(tegra->emc_clock, ULONG_MAX); if (rate < 0) { From 49f618e1b669ef0e26a8d8d7f8fafc7b8fd31531 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 24 Sep 2020 13:04:47 +0200 Subject: [PATCH 50/65] PM: domains: Rename power state enums for genpd To clarify the code a bit, let's rename GPD_STATE_ACTIVE into GENPD_STATE_ON and GPD_STATE_POWER_OFF to GENPD_STATE_OFF. Signed-off-by: Ulf Hansson [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 33 ++++++++++++++++----------------- include/linux/pm_domain.h | 4 ++-- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 2cb5e04cf86cd3..23aa2feced777b 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -123,7 +123,7 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_lock_interruptible(p) p->lock_ops->lock_interruptible(p) #define genpd_unlock(p) p->lock_ops->unlock(p) -#define genpd_status_on(genpd) (genpd->status == GPD_STATE_ACTIVE) +#define genpd_status_on(genpd) (genpd->status == GENPD_STATE_ON) #define genpd_is_irq_safe(genpd) (genpd->flags & GENPD_FLAG_IRQ_SAFE) #define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON) #define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP) @@ -222,7 +222,7 @@ static void genpd_update_accounting(struct generic_pm_domain *genpd) * out of off and so update the idle time and vice * versa. */ - if (genpd->status == GPD_STATE_ACTIVE) { + if (genpd->status == GENPD_STATE_ON) { int state_idx = genpd->state_idx; genpd->states[state_idx].idle_time = @@ -563,7 +563,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, return ret; } - genpd->status = GPD_STATE_POWER_OFF; + genpd->status = GENPD_STATE_OFF; genpd_update_accounting(genpd); list_for_each_entry(link, &genpd->child_links, child_node) { @@ -616,7 +616,7 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth) if (ret) goto err; - genpd->status = GPD_STATE_ACTIVE; + genpd->status = GENPD_STATE_ON; genpd_update_accounting(genpd); return 0; @@ -961,7 +961,7 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock, if (_genpd_power_off(genpd, false)) return; - genpd->status = GPD_STATE_POWER_OFF; + genpd->status = GENPD_STATE_OFF; list_for_each_entry(link, &genpd->child_links, child_node) { genpd_sd_counter_dec(link->parent); @@ -1007,8 +1007,7 @@ static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock, } _genpd_power_on(genpd, false); - - genpd->status = GPD_STATE_ACTIVE; + genpd->status = GENPD_STATE_ON; } /** @@ -1287,7 +1286,7 @@ static int genpd_restore_noirq(struct device *dev) * so make it appear as powered off to genpd_sync_power_on(), * so that it tries to power it on in case it was really off. */ - genpd->status = GPD_STATE_POWER_OFF; + genpd->status = GENPD_STATE_OFF; genpd_sync_power_on(genpd, true, 0); genpd_unlock(genpd); @@ -1777,7 +1776,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd, genpd->gov = gov; INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn); atomic_set(&genpd->sd_count, 0); - genpd->status = is_off ? GPD_STATE_POWER_OFF : GPD_STATE_ACTIVE; + genpd->status = is_off ? GENPD_STATE_OFF : GENPD_STATE_ON; genpd->device_count = 0; genpd->max_off_time_ns = -1; genpd->max_off_time_changed = true; @@ -2802,8 +2801,8 @@ static int genpd_summary_one(struct seq_file *s, struct generic_pm_domain *genpd) { static const char * const status_lookup[] = { - [GPD_STATE_ACTIVE] = "on", - [GPD_STATE_POWER_OFF] = "off" + [GENPD_STATE_ON] = "on", + [GENPD_STATE_OFF] = "off" }; struct pm_domain_data *pm_data; const char *kobj_path; @@ -2881,8 +2880,8 @@ static int summary_show(struct seq_file *s, void *data) static int status_show(struct seq_file *s, void *data) { static const char * const status_lookup[] = { - [GPD_STATE_ACTIVE] = "on", - [GPD_STATE_POWER_OFF] = "off" + [GENPD_STATE_ON] = "on", + [GENPD_STATE_OFF] = "off" }; struct generic_pm_domain *genpd = s->private; @@ -2895,7 +2894,7 @@ static int status_show(struct seq_file *s, void *data) if (WARN_ON_ONCE(genpd->status >= ARRAY_SIZE(status_lookup))) goto exit; - if (genpd->status == GPD_STATE_POWER_OFF) + if (genpd->status == GENPD_STATE_OFF) seq_printf(s, "%s-%u\n", status_lookup[genpd->status], genpd->state_idx); else @@ -2938,7 +2937,7 @@ static int idle_states_show(struct seq_file *s, void *data) ktime_t delta = 0; s64 msecs; - if ((genpd->status == GPD_STATE_POWER_OFF) && + if ((genpd->status == GENPD_STATE_OFF) && (genpd->state_idx == i)) delta = ktime_sub(ktime_get(), genpd->accounting_time); @@ -2961,7 +2960,7 @@ static int active_time_show(struct seq_file *s, void *data) if (ret) return -ERESTARTSYS; - if (genpd->status == GPD_STATE_ACTIVE) + if (genpd->status == GENPD_STATE_ON) delta = ktime_sub(ktime_get(), genpd->accounting_time); seq_printf(s, "%lld ms\n", ktime_to_ms( @@ -2984,7 +2983,7 @@ static int total_idle_time_show(struct seq_file *s, void *data) for (i = 0; i < genpd->state_count; i++) { - if ((genpd->status == GPD_STATE_POWER_OFF) && + if ((genpd->status == GENPD_STATE_OFF) && (genpd->state_idx == i)) delta = ktime_sub(ktime_get(), genpd->accounting_time); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index ee11502a575b0e..66f3c5d64d818d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -64,8 +64,8 @@ #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) enum gpd_status { - GPD_STATE_ACTIVE = 0, /* PM domain is active */ - GPD_STATE_POWER_OFF, /* PM domain is off */ + GENPD_STATE_ON = 0, /* PM domain is on */ + GENPD_STATE_OFF, /* PM domain is off */ }; struct dev_power_governor { From f63816e43d90442684cd2fd74f602cf3c5fae69c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 24 Sep 2020 13:04:48 +0200 Subject: [PATCH 51/65] PM: domains: Allow to abort power off when no ->power_off() callback In genpd_power_off() we may decide to abort the power off of the PM domain, even beyond the point when the governor would accept it. The abort is done if it turns out that a child domain has been requested to be powered on, which means it's waiting for the lock of the parent to be released. However, the abort is currently only considered if the genpd in question has a ->power_off() callback assigned. This is unnecessary limiting, especially if the genpd would have a parent of its own. Let's remove the limitation and make the behaviour consistent. Signed-off-by: Ulf Hansson [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 23aa2feced777b..0198af35850386 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -497,6 +497,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, struct pm_domain_data *pdd; struct gpd_link *link; unsigned int not_suspended = 0; + int ret; /* * Do not try to power off the domain in the following situations: @@ -544,24 +545,13 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, if (!genpd->gov) genpd->state_idx = 0; - if (genpd->power_off) { - int ret; - - if (atomic_read(&genpd->sd_count) > 0) - return -EBUSY; + /* Don't power off, if a child domain is waiting to power on. */ + if (atomic_read(&genpd->sd_count) > 0) + return -EBUSY; - /* - * If sd_count > 0 at this point, one of the subdomains hasn't - * managed to call genpd_power_on() for the parent yet after - * incrementing it. In that case genpd_power_on() will wait - * for us to drop the lock, so we can call .power_off() and let - * the genpd_power_on() restore power for us (this shouldn't - * happen very often). - */ - ret = _genpd_power_off(genpd, true); - if (ret) - return ret; - } + ret = _genpd_power_off(genpd, true); + if (ret) + return ret; genpd->status = GENPD_STATE_OFF; genpd_update_accounting(genpd); From 40c3bd4cfa6fe5756344709b98611a1ddd15bf9d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 5 Oct 2020 13:26:01 +0530 Subject: [PATCH 52/65] cpufreq: stats: Defer stats update to cpufreq_stats_record_transition() In order to prepare for lock-less stats update, add support to defer any updates to it until cpufreq_stats_record_transition() is called. The stats were updated from two places earlier: - show_time_in_state(): This can be easily deferred, all we need is to calculate the delta duration again in this routine to show the current state's time-in-state. - store_reset(): This is a bit tricky as we need to clear the stats here and avoid races with simultaneous call to cpufreq_stats_record_transition(). Fix that by deferring the reset of the stats (within the code) to the next call to cpufreq_stats_record_transition(), but since we need to keep showing the right stats until that time, we capture the reset time and account for the time since last time reset was called until the time cpufreq_stats_record_transition() update the stats. User space will continue seeing the stats correctly, everything will be 0 after the stats are reset, apart from the time-in-state of the current state, until the time a frequency switch happens. Signed-off-by: Viresh Kumar [ rjw: Minor changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 75 ++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 94d959a8e954e9..498d962ba22481 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -22,17 +22,22 @@ struct cpufreq_stats { spinlock_t lock; unsigned int *freq_table; unsigned int *trans_table; + + /* Deferred reset */ + unsigned int reset_pending; + unsigned long long reset_time; }; -static void cpufreq_stats_update(struct cpufreq_stats *stats) +static void cpufreq_stats_update(struct cpufreq_stats *stats, + unsigned long long time) { unsigned long long cur_time = get_jiffies_64(); - stats->time_in_state[stats->last_index] += cur_time - stats->last_time; + stats->time_in_state[stats->last_index] += cur_time - time; stats->last_time = cur_time; } -static void cpufreq_stats_clear_table(struct cpufreq_stats *stats) +static void cpufreq_stats_reset_table(struct cpufreq_stats *stats) { unsigned int count = stats->max_state; @@ -41,42 +46,67 @@ static void cpufreq_stats_clear_table(struct cpufreq_stats *stats) memset(stats->trans_table, 0, count * count * sizeof(int)); stats->last_time = get_jiffies_64(); stats->total_trans = 0; + + /* Adjust for the time elapsed since reset was requested */ + WRITE_ONCE(stats->reset_pending, 0); + cpufreq_stats_update(stats, READ_ONCE(stats->reset_time)); spin_unlock(&stats->lock); } static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf) { - return sprintf(buf, "%d\n", policy->stats->total_trans); + struct cpufreq_stats *stats = policy->stats; + + if (READ_ONCE(stats->reset_pending)) + return sprintf(buf, "%d\n", 0); + else + return sprintf(buf, "%d\n", stats->total_trans); } cpufreq_freq_attr_ro(total_trans); static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) { struct cpufreq_stats *stats = policy->stats; + bool pending = READ_ONCE(stats->reset_pending); + unsigned long long time; ssize_t len = 0; int i; if (policy->fast_switch_enabled) return 0; - spin_lock(&stats->lock); - cpufreq_stats_update(stats); - spin_unlock(&stats->lock); - for (i = 0; i < stats->state_num; i++) { + if (pending) { + if (i == stats->last_index) + time = get_jiffies_64() - READ_ONCE(stats->reset_time); + else + time = 0; + } else { + time = stats->time_in_state[i]; + if (i == stats->last_index) + time += get_jiffies_64() - stats->last_time; + } + len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i], - (unsigned long long) - jiffies_64_to_clock_t(stats->time_in_state[i])); + jiffies_64_to_clock_t(time)); } return len; } cpufreq_freq_attr_ro(time_in_state); +/* We don't care what is written to the attribute */ static ssize_t store_reset(struct cpufreq_policy *policy, const char *buf, size_t count) { - /* We don't care what is written to the attribute. */ - cpufreq_stats_clear_table(policy->stats); + struct cpufreq_stats *stats = policy->stats; + + /* + * Defer resetting of stats to cpufreq_stats_record_transition() to + * avoid races. + */ + WRITE_ONCE(stats->reset_time, get_jiffies_64()); + WRITE_ONCE(stats->reset_pending, 1); + return count; } cpufreq_freq_attr_wo(reset); @@ -84,8 +114,9 @@ cpufreq_freq_attr_wo(reset); static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) { struct cpufreq_stats *stats = policy->stats; + bool pending = READ_ONCE(stats->reset_pending); ssize_t len = 0; - int i, j; + int i, j, count; if (policy->fast_switch_enabled) return 0; @@ -113,8 +144,13 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) for (j = 0; j < stats->state_num; j++) { if (len >= PAGE_SIZE) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "%9u ", - stats->trans_table[i*stats->max_state+j]); + + if (pending) + count = 0; + else + count = stats->trans_table[i * stats->max_state + j]; + + len += scnprintf(buf + len, PAGE_SIZE - len, "%9u ", count); } if (len >= PAGE_SIZE) break; @@ -228,10 +264,11 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, struct cpufreq_stats *stats = policy->stats; int old_index, new_index; - if (!stats) { - pr_debug("%s: No stats found\n", __func__); + if (!stats) return; - } + + if (unlikely(READ_ONCE(stats->reset_pending))) + cpufreq_stats_reset_table(stats); old_index = stats->last_index; new_index = freq_table_get_index(stats, new_freq); @@ -241,7 +278,7 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, return; spin_lock(&stats->lock); - cpufreq_stats_update(stats); + cpufreq_stats_update(stats, stats->last_time); stats->last_index = new_index; stats->trans_table[old_index * stats->max_state + new_index]++; From 381abb942a392377f614b62c87692f4f2e8aa59b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 5 Oct 2020 13:26:02 +0530 Subject: [PATCH 53/65] cpufreq: stats: Remove locking The locking isn't required anymore as stats can get updated only from one place at a time. Remove it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 498d962ba22481..45a06785536771 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -19,7 +19,6 @@ struct cpufreq_stats { unsigned int state_num; unsigned int last_index; u64 *time_in_state; - spinlock_t lock; unsigned int *freq_table; unsigned int *trans_table; @@ -41,7 +40,6 @@ static void cpufreq_stats_reset_table(struct cpufreq_stats *stats) { unsigned int count = stats->max_state; - spin_lock(&stats->lock); memset(stats->time_in_state, 0, count * sizeof(u64)); memset(stats->trans_table, 0, count * count * sizeof(int)); stats->last_time = get_jiffies_64(); @@ -50,7 +48,6 @@ static void cpufreq_stats_reset_table(struct cpufreq_stats *stats) /* Adjust for the time elapsed since reset was requested */ WRITE_ONCE(stats->reset_pending, 0); cpufreq_stats_update(stats, READ_ONCE(stats->reset_time)); - spin_unlock(&stats->lock); } static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf) @@ -244,7 +241,6 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy) stats->state_num = i; stats->last_time = get_jiffies_64(); stats->last_index = freq_table_get_index(stats, policy->cur); - spin_lock_init(&stats->lock); policy->stats = stats; ret = sysfs_create_group(&policy->kobj, &stats_attr_group); @@ -277,11 +273,9 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, if (old_index == -1 || new_index == -1 || old_index == new_index) return; - spin_lock(&stats->lock); cpufreq_stats_update(stats, stats->last_time); stats->last_index = new_index; stats->trans_table[old_index * stats->max_state + new_index]++; stats->total_trans++; - spin_unlock(&stats->lock); } From 4958b46efb6df20fa3abb3a4d007515485f6af16 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 5 Oct 2020 13:26:03 +0530 Subject: [PATCH 54/65] cpufreq: stats: Mark few conditionals with unlikely() Since this will be part of the scheduler's hotpath in some cases, use unlikely() for few of the obvious conditionals. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 45a06785536771..bba04da3a27801 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -260,7 +260,7 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, struct cpufreq_stats *stats = policy->stats; int old_index, new_index; - if (!stats) + if (unlikely(!stats)) return; if (unlikely(READ_ONCE(stats->reset_pending))) @@ -270,7 +270,7 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, new_index = freq_table_get_index(stats, new_freq); /* We can't do stats->time_in_state[-1]= .. */ - if (old_index == -1 || new_index == -1 || old_index == new_index) + if (unlikely(old_index == -1 || new_index == -1 || old_index == new_index)) return; cpufreq_stats_update(stats, stats->last_time); From 96f60cddf7a1c5374296dd14355b519ee3245b3e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 5 Oct 2020 13:26:04 +0530 Subject: [PATCH 55/65] cpufreq: stats: Enable stats for fast-switch as well Now that all the blockers are gone for enabling stats in fast-switching case, enable it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 4 ++++ drivers/cpufreq/cpufreq_stats.c | 6 ------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2ea245a6c0c074..2d0e2e464b1436 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2072,8 +2072,12 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, target_freq = clamp_val(target_freq, policy->min, policy->max); freq = cpufreq_driver->fast_switch(policy, target_freq); + if (!freq) + return 0; + arch_set_freq_scale(policy->related_cpus, freq, policy->cpuinfo.max_freq); + cpufreq_stats_record_transition(policy, freq); return freq; } diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index bba04da3a27801..8e7d64f3404125 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -69,9 +69,6 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) ssize_t len = 0; int i; - if (policy->fast_switch_enabled) - return 0; - for (i = 0; i < stats->state_num; i++) { if (pending) { if (i == stats->last_index) @@ -115,9 +112,6 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) ssize_t len = 0; int i, j, count; - if (policy->fast_switch_enabled) - return 0; - len += scnprintf(buf + len, PAGE_SIZE - len, " From : To\n"); len += scnprintf(buf + len, PAGE_SIZE - len, " : "); for (i = 0; i < stats->state_num; i++) { From 08d8c65e849d7579bafe2b03eab844d7860e3682 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 5 Oct 2020 13:26:05 +0530 Subject: [PATCH 56/65] cpufreq: Move traces and update to policy->cur to cpufreq core The cpufreq core handles the updates to policy->cur and recording of cpufreq trace events for all the governors except schedutil's fast switch case. Move that as well to cpufreq core for consistency and readability. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 7 +++++++ kernel/sched/cpufreq_schedutil.c | 12 +----------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2d0e2e464b1436..db00693a586a7a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2068,6 +2068,7 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { unsigned int freq; + int cpu; target_freq = clamp_val(target_freq, policy->min, policy->max); freq = cpufreq_driver->fast_switch(policy, target_freq); @@ -2075,10 +2076,16 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, if (!freq) return 0; + policy->cur = freq; arch_set_freq_scale(policy->related_cpus, freq, policy->cpuinfo.max_freq); cpufreq_stats_record_transition(policy, freq); + if (trace_cpu_frequency_enabled()) { + for_each_cpu(cpu, policy->cpus) + trace_cpu_frequency(freq, cpu); + } + return freq; } EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e39008242cf4d6..28f6d1ad608b77 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -115,21 +115,11 @@ static void sugov_fast_switch(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { struct cpufreq_policy *policy = sg_policy->policy; - int cpu; if (!sugov_update_next_freq(sg_policy, time, next_freq)) return; - next_freq = cpufreq_driver_fast_switch(policy, next_freq); - if (!next_freq) - return; - - policy->cur = next_freq; - - if (trace_cpu_frequency_enabled()) { - for_each_cpu(cpu, policy->cpus) - trace_cpu_frequency(next_freq, cpu); - } + cpufreq_driver_fast_switch(policy, next_freq); } static void sugov_deferred_update(struct sugov_policy *sg_policy, u64 time, From 428805c0c5e76ef643b1fbc893edfb636b3d8aef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 18:14:47 +0200 Subject: [PATCH 57/65] PM: hibernate: remove the bogus call to get_gendisk() in software_resume() get_gendisk grabs a reference on the disk and file operation, so this code will leak both of them while having absolutely no use for the gendisk itself. This effectively reverts commit 2df83fa4bce421f ("PM / Hibernate: Use get_gendisk to verify partition if resume_file is integer format") Signed-off-by: Christoph Hellwig Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e7aa57fb2fdc33..7d0b99d2e69631 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -948,17 +948,6 @@ static int software_resume(void) /* Check if the device is there */ swsusp_resume_device = name_to_dev_t(resume_file); - - /* - * name_to_dev_t is ineffective to verify parition if resume_file is in - * integer format. (e.g. major:minor) - */ - if (isdigit(resume_file[0]) && resume_wait) { - int partno; - while (!get_gendisk(swsusp_resume_device, &partno)) - msleep(10); - } - if (!swsusp_resume_device) { /* * Some device discovery might still be in progress; we need From c6e331312ebfb52b7186e5d82d517d68b4d2f2d8 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 2 Oct 2020 07:10:12 +0200 Subject: [PATCH 58/65] PCI/ACPI: Whitelist hotplug ports for D3 if power managed by ACPI Recent laptops with dual AMD GPUs fail to suspend the discrete GPU, thus causing lockups on system sleep and high power consumption at runtime. The discrete GPU would normally be suspended to D3cold by turning off ACPI _PR3 Power Resources of the Root Port above the GPU. However on affected systems, the Root Port is hotplug-capable and pci_bridge_d3_possible() only allows hotplug ports to go to D3 if they belong to a Thunderbolt device or if the Root Port possesses a "HotPlugSupportInD3" ACPI property. Neither is the case on affected laptops. The reason for whitelisting only specific, known to work hotplug ports for D3 is that there have been reports of SkyLake Xeon-SP systems raising Hardware Error NMIs upon suspending their hotplug ports: https://lore.kernel.org/linux-pci/20170503180426.GA4058@otc-nc-03/ But if a hotplug port is power manageable by ACPI (as can be detected through presence of Power Resources and corresponding _PS0 and _PS3 methods) then it ought to be safe to suspend it to D3. To this end, amend acpi_pci_bridge_d3() to whitelist such ports for D3. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1222 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1252 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1304 Reported-and-tested-by: Arthur Borsboom Reported-and-tested-by: matoro Reported-by: Aaron Zakhrov Reported-by: Michal Rostecki Reported-by: Shai Coleman Signed-off-by: Lukas Wunner Acked-by: Alex Deucher Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci-acpi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index d5869a03f74836..d9aa551f842363 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -944,6 +944,16 @@ static bool acpi_pci_bridge_d3(struct pci_dev *dev) if (!dev->is_hotplug_bridge) return false; + /* Assume D3 support if the bridge is power-manageable by ACPI. */ + adev = ACPI_COMPANION(&dev->dev); + if (!adev && !pci_dev_is_added(dev)) { + adev = acpi_pci_find_companion(&dev->dev); + ACPI_COMPANION_SET(&dev->dev, adev); + } + + if (adev && acpi_device_power_manageable(adev)) + return true; + /* * Look for a special _DSD property for the root port and if it * is set we know the hierarchy behind it supports D3 just fine. From 5e92442bb4121562231e6daf8a2d1306cb5f8805 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 5 Oct 2020 19:13:15 +0200 Subject: [PATCH 59/65] ACPI: EC: PM: Flush EC work unconditionally after wakeup Commit 607b9df63057 ("ACPI: EC: PM: Avoid flushing EC work when EC GPE is inactive") has been reported to cause some power button wakeup events to be missed on some systems, so modify acpi_ec_dispatch_gpe() to call acpi_ec_flush_work() unconditionally to effectively reverse the changes made by that commit. Also note that the problem which prompted commit 607b9df63057 is not reproducible any more on the affected machine. Fixes: 607b9df63057 ("ACPI: EC: PM: Avoid flushing EC work when EC GPE is inactive") Reported-by: Raymond Tan Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index fcddda3d67128c..548cdbf7cca167 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -2019,12 +2019,11 @@ bool acpi_ec_dispatch_gpe(void) * to allow the caller to process events properly after that. */ ret = acpi_dispatch_gpe(NULL, first_ec->gpe); - if (ret == ACPI_INTERRUPT_HANDLED) { + if (ret == ACPI_INTERRUPT_HANDLED) pm_pr_dbg("ACPI EC GPE dispatched\n"); - /* Flush the event and query workqueues. */ - acpi_ec_flush_work(); - } + /* Flush the event and query workqueues. */ + acpi_ec_flush_work(); return false; } From e0e9ce390d7bc6a705653d4a8aa4ea92c9a65e53 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 5 Oct 2020 19:13:46 +0200 Subject: [PATCH 60/65] ACPI: EC: PM: Drop ec_no_wakeup check from acpi_ec_dispatch_gpe() It turns out that in some cases there are EC events to flush in acpi_ec_dispatch_gpe() even though the ec_no_wakeup kernel parameter is set and the EC GPE is disabled while sleeping, so drop the ec_no_wakeup check that prevents those events from being processed from acpi_ec_dispatch_gpe(). Reported-by: Todd Brandt Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 548cdbf7cca167..e0cb1bcfffb293 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -2011,9 +2011,6 @@ bool acpi_ec_dispatch_gpe(void) if (acpi_any_gpe_status_set(first_ec->gpe)) return true; - if (ec_no_wakeup) - return false; - /* * Dispatch the EC GPE in-band, but do not report wakeup in any case * to allow the caller to process events properly after that. From 86836bac55f971995499978df4e62115d7baf5ef Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Oct 2020 14:01:31 +0200 Subject: [PATCH 61/65] cpufreq: schedutil: Simplify sugov_fast_switch() Drop a redundant local variable definition from sugov_fast_switch() and rearrange the code in there to avoid the redundant logical negation. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- kernel/sched/cpufreq_schedutil.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 28f6d1ad608b77..5ae7b4e6e8d6e1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -114,12 +114,8 @@ static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, static void sugov_fast_switch(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { - struct cpufreq_policy *policy = sg_policy->policy; - - if (!sugov_update_next_freq(sg_policy, time, next_freq)) - return; - - cpufreq_driver_fast_switch(policy, next_freq); + if (sugov_update_next_freq(sg_policy, time, next_freq)) + cpufreq_driver_fast_switch(sg_policy->policy, next_freq); } static void sugov_deferred_update(struct sugov_policy *sg_policy, u64 time, From efad4240da949fc3249015065b95d708b72ae670 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Oct 2020 21:43:43 +0200 Subject: [PATCH 62/65] cpufreq: stats: Add memory barrier to store_reset() There is nothing to prevent the CPU or the compiler from reordering the writes to stats->reset_time and stats->reset_pending in store_reset(), in which case the readers of stats->reset_time may see a stale value. Moreover, on 32-bit arches the write to reset_time cannot be completed in one go, so the readers of it may see a partially updated value in that case. To prevent that from happening, add a write memory barrier between the writes to stats->reset_time and stats->reset_pending in store_reset() and corresponding read memory barrier in the readers of stats->reset_time. Fixes: 40c3bd4cfa6f ("cpufreq: stats: Defer stats update to cpufreq_stats_record_transition()") Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_stats.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 8e7d64f3404125..1b1389face8569 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -47,6 +47,11 @@ static void cpufreq_stats_reset_table(struct cpufreq_stats *stats) /* Adjust for the time elapsed since reset was requested */ WRITE_ONCE(stats->reset_pending, 0); + /* + * Prevent the reset_time read from being reordered before the + * reset_pending accesses in cpufreq_stats_record_transition(). + */ + smp_rmb(); cpufreq_stats_update(stats, READ_ONCE(stats->reset_time)); } @@ -71,10 +76,16 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) for (i = 0; i < stats->state_num; i++) { if (pending) { - if (i == stats->last_index) + if (i == stats->last_index) { + /* + * Prevent the reset_time read from occurring + * before the reset_pending read above. + */ + smp_rmb(); time = get_jiffies_64() - READ_ONCE(stats->reset_time); - else + } else { time = 0; + } } else { time = stats->time_in_state[i]; if (i == stats->last_index) @@ -99,6 +110,11 @@ static ssize_t store_reset(struct cpufreq_policy *policy, const char *buf, * avoid races. */ WRITE_ONCE(stats->reset_time, get_jiffies_64()); + /* + * The memory barrier below is to prevent the readers of reset_time from + * seeing a stale or partially updated value. + */ + smp_wmb(); WRITE_ONCE(stats->reset_pending, 1); return count; From a20b7053b5c47cd7de23288238ea4d23502f300a Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 24 Sep 2020 13:30:15 +0100 Subject: [PATCH 63/65] cpufreq,arm,arm64: restructure definitions of arch_set_freq_scale() Compared to other arch_* functions, arch_set_freq_scale() has an atypical weak definition that can be replaced by a strong architecture specific implementation. The more typical support for architectural functions involves defining an empty stub in a header file if the symbol is not already defined in architecture code. Some examples involve: - #define arch_scale_freq_capacity topology_get_freq_scale - #define arch_scale_freq_invariant topology_scale_freq_invariant - #define arch_scale_cpu_capacity topology_get_cpu_scale - #define arch_update_cpu_topology topology_update_cpu_topology - #define arch_scale_thermal_pressure topology_get_thermal_pressure - #define arch_set_thermal_pressure topology_set_thermal_pressure Bring arch_set_freq_scale() in line with these functions by renaming it to topology_set_freq_scale() in the arch topology driver, and by defining the arch_set_freq_scale symbol to point to the new function for arm and arm64. While there are other users of the arch_topology driver, this patch defines arch_set_freq_scale for arm and arm64 only, due to their existing definitions of arch_scale_freq_capacity. This is the getter function of the frequency invariance scale factor and without a getter function, the setter function - arch_set_freq_scale() has not purpose. Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar Acked-by: Catalin Marinas Acked-by: Sudeep Holla (BL_SWITCHER and topology parts) Signed-off-by: Rafael J. Wysocki --- arch/arm/include/asm/topology.h | 1 + arch/arm64/include/asm/topology.h | 1 + drivers/base/arch_topology.c | 4 ++-- drivers/cpufreq/cpufreq.c | 7 ------- include/linux/arch_topology.h | 2 ++ include/linux/cpufreq.h | 11 ++++++++--- 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 9219e67befbe5d..e5e3d5ce4d55dd 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -8,6 +8,7 @@ #include /* Replace task scheduler's default frequency-invariant accounting */ +#define arch_set_freq_scale topology_set_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_invariant topology_scale_freq_invariant diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 7cb519473fbd8a..11a465243f660a 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -26,6 +26,7 @@ void topology_scale_freq_tick(void); #endif /* CONFIG_ARM64_AMU_EXTN */ /* Replace task scheduler's default frequency-invariant accounting */ +#define arch_set_freq_scale topology_set_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_invariant topology_scale_freq_invariant diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 89cae168b076bb..c1a9e2fb634ef3 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -33,8 +33,8 @@ __weak bool arch_freq_counters_available(const struct cpumask *cpus) } DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; -void arch_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, - unsigned long max_freq) +void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, + unsigned long max_freq) { unsigned long scale; int i; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index db00693a586a7a..1877f5e2e5b06c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -160,13 +160,6 @@ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy) } EXPORT_SYMBOL_GPL(get_cpu_idle_time); -__weak void arch_set_freq_scale(const struct cpumask *cpus, - unsigned long cur_freq, - unsigned long max_freq) -{ -} -EXPORT_SYMBOL_GPL(arch_set_freq_scale); - /* * This is a generic cpufreq init() routine which can be used by cpufreq * drivers of SMP systems. It will do following: diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 083df331a3c9f4..0f6cd6b73a6137 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -30,6 +30,8 @@ static inline unsigned long topology_get_freq_scale(int cpu) return per_cpu(freq_scale, cpu); } +void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, + unsigned long max_freq); bool topology_scale_freq_invariant(void); bool arch_freq_counters_available(const struct cpumask *cpus); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9f779fbdbe7bb0..fa37b1c6644326 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1011,9 +1011,14 @@ static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy, extern void arch_freq_prepare_all(void); extern unsigned int arch_freq_get_on_cpu(int cpu); -extern void arch_set_freq_scale(const struct cpumask *cpus, - unsigned long cur_freq, - unsigned long max_freq); +#ifndef arch_set_freq_scale +static __always_inline +void arch_set_freq_scale(const struct cpumask *cpus, + unsigned long cur_freq, + unsigned long max_freq) +{ +} +#endif /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; From 6699e91c071715efefd7d5709755e557e948fdd9 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 24 Sep 2020 13:30:16 +0100 Subject: [PATCH 64/65] arm: disable frequency invariance for CONFIG_BL_SWITCHER big.LITTLE switching complicates the setting of a correct cpufreq-based frequency invariance scale factor due to (as observed in drivers/cpufreq/vexpress-spc-cpufreq.c): - Incorrect current and maximum frequencies as a result of the exposure of a virtual frequency table to the cpufreq core, - Missed updates as a result of asynchronous frequency adjustments caused by frequency changes in other CPU pairs. Given that its functionality is atypical in regards to frequency invariance and this is an old technology, disable frequency invariance for when big.LITTLE switching is configured in to prevent incorrect scale setting. Signed-off-by: Ionela Voinescu Suggested-by: Dietmar Eggemann Acked-by: Viresh Kumar Acked-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- arch/arm/include/asm/topology.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index e5e3d5ce4d55dd..470299ee2fbaaf 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -7,10 +7,13 @@ #include #include +/* big.LITTLE switcher is incompatible with frequency invariance */ +#ifndef CONFIG_BL_SWITCHER /* Replace task scheduler's default frequency-invariant accounting */ #define arch_set_freq_scale topology_set_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_invariant topology_scale_freq_invariant +#endif /* Replace task scheduler's default cpu-invariant accounting */ #define arch_scale_cpu_capacity topology_get_cpu_scale From b7af6080a3d2d35a410db3eeb162e1ab49c4d27f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 12 Oct 2020 10:20:07 +0530 Subject: [PATCH 65/65] cpufreq: stats: Fix string format specifier mismatch Fix following warning: drivers/cpufreq/cpufreq_stats.c:63:10: warning: %d in format string (no. 1) requires 'int' but the argument type is 'unsigned int' Fixes: 40c3bd4cfa6f ("cpufreq: stats: Defer stats update to cpufreq_stats_record_transition()") Reported-by: kernel test robot Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 1b1389face8569..6cd5c8ab5d49f6 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -62,7 +62,7 @@ static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf) if (READ_ONCE(stats->reset_pending)) return sprintf(buf, "%d\n", 0); else - return sprintf(buf, "%d\n", stats->total_trans); + return sprintf(buf, "%u\n", stats->total_trans); } cpufreq_freq_attr_ro(total_trans);