forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fpga: dfl: fme: add thermal management support
This patch adds support to thermal management private feature for DFL FPGA Management Engine (FME). This private feature driver registers a hwmon for thermal/temperature monitoring (hwmon temp1_input). If hardware automatic throttling is supported by this hardware, then driver also exposes sysfs interfaces under hwmon for thresholds (temp1_max/ crit/ emergency), threshold alarms (temp1_max_alarm/ temp1_crit_alarm) and throttling policy (temp1_max_policy). Signed-off-by: Luwei Kang <[email protected]> Signed-off-by: Russ Weight <[email protected]> Signed-off-by: Xu Yilun <[email protected]> Signed-off-by: Wu Hao <[email protected]> Acked-by: Guenter Roeck <[email protected]> Reviewed-by: Moritz Fischer <[email protected]> Signed-off-by: Moritz Fischer <[email protected]>
- Loading branch information
Showing
3 changed files
with
243 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -106,3 +106,67 @@ KernelVersion: 5.4 | |
Contact: Wu Hao <[email protected]> | ||
Description: Read-only. Read this file to get the second error detected by | ||
hardware. | ||
|
||
What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/name | ||
Date: October 2019 | ||
KernelVersion: 5.5 | ||
Contact: Wu Hao <[email protected]> | ||
Description: Read-Only. Read this file to get the name of hwmon device, it | ||
supports values: | ||
'dfl_fme_thermal' - thermal hwmon device name | ||
|
||
What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_input | ||
Date: October 2019 | ||
KernelVersion: 5.5 | ||
Contact: Wu Hao <[email protected]> | ||
Description: Read-Only. It returns FPGA device temperature in millidegrees | ||
Celsius. | ||
|
||
What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_max | ||
Date: October 2019 | ||
KernelVersion: 5.5 | ||
Contact: Wu Hao <[email protected]> | ||
Description: Read-Only. It returns hardware threshold1 temperature in | ||
millidegrees Celsius. If temperature rises at or above this | ||
threshold, hardware starts 50% or 90% throttling (see | ||
'temp1_max_policy'). | ||
|
||
What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_crit | ||
Date: October 2019 | ||
KernelVersion: 5.5 | ||
Contact: Wu Hao <[email protected]> | ||
Description: Read-Only. It returns hardware threshold2 temperature in | ||
millidegrees Celsius. If temperature rises at or above this | ||
threshold, hardware starts 100% throttling. | ||
|
||
What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_emergency | ||
Date: October 2019 | ||
KernelVersion: 5.5 | ||
Contact: Wu Hao <[email protected]> | ||
Description: Read-Only. It returns hardware trip threshold temperature in | ||
millidegrees Celsius. If temperature rises at or above this | ||
threshold, a fatal event will be triggered to board management | ||
controller (BMC) to shutdown FPGA. | ||
|
||
What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_max_alarm | ||
Date: October 2019 | ||
KernelVersion: 5.5 | ||
Contact: Wu Hao <[email protected]> | ||
Description: Read-only. It returns 1 if temperature is currently at or above | ||
hardware threshold1 (see 'temp1_max'), otherwise 0. | ||
|
||
What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_crit_alarm | ||
Date: October 2019 | ||
KernelVersion: 5.5 | ||
Contact: Wu Hao <[email protected]> | ||
Description: Read-only. It returns 1 if temperature is currently at or above | ||
hardware threshold2 (see 'temp1_crit'), otherwise 0. | ||
|
||
What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_max_policy | ||
Date: October 2019 | ||
KernelVersion: 5.5 | ||
Contact: Wu Hao <[email protected]> | ||
Description: Read-Only. Read this file to get the policy of hardware threshold1 | ||
(see 'temp1_max'). It only supports two values (policies): | ||
0 - AP2 state (90% throttling) | ||
1 - AP1 state (50% throttling) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,8 @@ | |
* Henry Mitchel <[email protected]> | ||
*/ | ||
|
||
#include <linux/hwmon.h> | ||
#include <linux/hwmon-sysfs.h> | ||
#include <linux/kernel.h> | ||
#include <linux/module.h> | ||
#include <linux/uaccess.h> | ||
|
@@ -181,6 +183,178 @@ static const struct dfl_feature_ops fme_hdr_ops = { | |
.ioctl = fme_hdr_ioctl, | ||
}; | ||
|
||
#define FME_THERM_THRESHOLD 0x8 | ||
#define TEMP_THRESHOLD1 GENMASK_ULL(6, 0) | ||
#define TEMP_THRESHOLD1_EN BIT_ULL(7) | ||
#define TEMP_THRESHOLD2 GENMASK_ULL(14, 8) | ||
#define TEMP_THRESHOLD2_EN BIT_ULL(15) | ||
#define TRIP_THRESHOLD GENMASK_ULL(30, 24) | ||
#define TEMP_THRESHOLD1_STATUS BIT_ULL(32) /* threshold1 reached */ | ||
#define TEMP_THRESHOLD2_STATUS BIT_ULL(33) /* threshold2 reached */ | ||
/* threshold1 policy: 0 - AP2 (90% throttle) / 1 - AP1 (50% throttle) */ | ||
#define TEMP_THRESHOLD1_POLICY BIT_ULL(44) | ||
|
||
#define FME_THERM_RDSENSOR_FMT1 0x10 | ||
#define FPGA_TEMPERATURE GENMASK_ULL(6, 0) | ||
|
||
#define FME_THERM_CAP 0x20 | ||
#define THERM_NO_THROTTLE BIT_ULL(0) | ||
|
||
#define MD_PRE_DEG | ||
|
||
static bool fme_thermal_throttle_support(void __iomem *base) | ||
{ | ||
u64 v = readq(base + FME_THERM_CAP); | ||
|
||
return FIELD_GET(THERM_NO_THROTTLE, v) ? false : true; | ||
} | ||
|
||
static umode_t thermal_hwmon_attrs_visible(const void *drvdata, | ||
enum hwmon_sensor_types type, | ||
u32 attr, int channel) | ||
{ | ||
const struct dfl_feature *feature = drvdata; | ||
|
||
/* temperature is always supported, and check hardware cap for others */ | ||
if (attr == hwmon_temp_input) | ||
return 0444; | ||
|
||
return fme_thermal_throttle_support(feature->ioaddr) ? 0444 : 0; | ||
} | ||
|
||
static int thermal_hwmon_read(struct device *dev, enum hwmon_sensor_types type, | ||
u32 attr, int channel, long *val) | ||
{ | ||
struct dfl_feature *feature = dev_get_drvdata(dev); | ||
u64 v; | ||
|
||
switch (attr) { | ||
case hwmon_temp_input: | ||
v = readq(feature->ioaddr + FME_THERM_RDSENSOR_FMT1); | ||
*val = (long)(FIELD_GET(FPGA_TEMPERATURE, v) * 1000); | ||
break; | ||
case hwmon_temp_max: | ||
v = readq(feature->ioaddr + FME_THERM_THRESHOLD); | ||
*val = (long)(FIELD_GET(TEMP_THRESHOLD1, v) * 1000); | ||
break; | ||
case hwmon_temp_crit: | ||
v = readq(feature->ioaddr + FME_THERM_THRESHOLD); | ||
*val = (long)(FIELD_GET(TEMP_THRESHOLD2, v) * 1000); | ||
break; | ||
case hwmon_temp_emergency: | ||
v = readq(feature->ioaddr + FME_THERM_THRESHOLD); | ||
*val = (long)(FIELD_GET(TRIP_THRESHOLD, v) * 1000); | ||
break; | ||
case hwmon_temp_max_alarm: | ||
v = readq(feature->ioaddr + FME_THERM_THRESHOLD); | ||
*val = (long)FIELD_GET(TEMP_THRESHOLD1_STATUS, v); | ||
break; | ||
case hwmon_temp_crit_alarm: | ||
v = readq(feature->ioaddr + FME_THERM_THRESHOLD); | ||
*val = (long)FIELD_GET(TEMP_THRESHOLD2_STATUS, v); | ||
break; | ||
default: | ||
return -EOPNOTSUPP; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
static const struct hwmon_ops thermal_hwmon_ops = { | ||
.is_visible = thermal_hwmon_attrs_visible, | ||
.read = thermal_hwmon_read, | ||
}; | ||
|
||
static const struct hwmon_channel_info *thermal_hwmon_info[] = { | ||
HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_EMERGENCY | | ||
HWMON_T_MAX | HWMON_T_MAX_ALARM | | ||
HWMON_T_CRIT | HWMON_T_CRIT_ALARM), | ||
NULL | ||
}; | ||
|
||
static const struct hwmon_chip_info thermal_hwmon_chip_info = { | ||
.ops = &thermal_hwmon_ops, | ||
.info = thermal_hwmon_info, | ||
}; | ||
|
||
static ssize_t temp1_max_policy_show(struct device *dev, | ||
struct device_attribute *attr, char *buf) | ||
{ | ||
struct dfl_feature *feature = dev_get_drvdata(dev); | ||
u64 v; | ||
|
||
v = readq(feature->ioaddr + FME_THERM_THRESHOLD); | ||
|
||
return sprintf(buf, "%u\n", | ||
(unsigned int)FIELD_GET(TEMP_THRESHOLD1_POLICY, v)); | ||
} | ||
|
||
static DEVICE_ATTR_RO(temp1_max_policy); | ||
|
||
static struct attribute *thermal_extra_attrs[] = { | ||
&dev_attr_temp1_max_policy.attr, | ||
NULL, | ||
}; | ||
|
||
static umode_t thermal_extra_attrs_visible(struct kobject *kobj, | ||
struct attribute *attr, int index) | ||
{ | ||
struct device *dev = kobj_to_dev(kobj); | ||
struct dfl_feature *feature = dev_get_drvdata(dev); | ||
|
||
return fme_thermal_throttle_support(feature->ioaddr) ? attr->mode : 0; | ||
} | ||
|
||
static const struct attribute_group thermal_extra_group = { | ||
.attrs = thermal_extra_attrs, | ||
.is_visible = thermal_extra_attrs_visible, | ||
}; | ||
__ATTRIBUTE_GROUPS(thermal_extra); | ||
|
||
static int fme_thermal_mgmt_init(struct platform_device *pdev, | ||
struct dfl_feature *feature) | ||
{ | ||
struct device *hwmon; | ||
|
||
/* | ||
* create hwmon to allow userspace monitoring temperature and other | ||
* threshold information. | ||
* | ||
* temp1_input -> FPGA device temperature | ||
* temp1_max -> hardware threshold 1 -> 50% or 90% throttling | ||
* temp1_crit -> hardware threshold 2 -> 100% throttling | ||
* temp1_emergency -> hardware trip_threshold to shutdown FPGA | ||
* temp1_max_alarm -> hardware threshold 1 alarm | ||
* temp1_crit_alarm -> hardware threshold 2 alarm | ||
* | ||
* create device specific sysfs interfaces, e.g. read temp1_max_policy | ||
* to understand the actual hardware throttling action (50% vs 90%). | ||
* | ||
* If hardware doesn't support automatic throttling per thresholds, | ||
* then all above sysfs interfaces are not visible except temp1_input | ||
* for temperature. | ||
*/ | ||
hwmon = devm_hwmon_device_register_with_info(&pdev->dev, | ||
"dfl_fme_thermal", feature, | ||
&thermal_hwmon_chip_info, | ||
thermal_extra_groups); | ||
if (IS_ERR(hwmon)) { | ||
dev_err(&pdev->dev, "Fail to register thermal hwmon\n"); | ||
return PTR_ERR(hwmon); | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
static const struct dfl_feature_id fme_thermal_mgmt_id_table[] = { | ||
{.id = FME_FEATURE_ID_THERMAL_MGMT,}, | ||
{0,} | ||
}; | ||
|
||
static const struct dfl_feature_ops fme_thermal_mgmt_ops = { | ||
.init = fme_thermal_mgmt_init, | ||
}; | ||
|
||
static struct dfl_feature_driver fme_feature_drvs[] = { | ||
{ | ||
.id_table = fme_hdr_id_table, | ||
|
@@ -194,6 +368,10 @@ static struct dfl_feature_driver fme_feature_drvs[] = { | |
.id_table = fme_global_err_id_table, | ||
.ops = &fme_global_err_ops, | ||
}, | ||
{ | ||
.id_table = fme_thermal_mgmt_id_table, | ||
.ops = &fme_thermal_mgmt_ops, | ||
}, | ||
{ | ||
.ops = NULL, | ||
}, | ||
|