diff --git a/Documentation/devicetree/bindings/opp/kryo-cpufreq.txt b/Documentation/devicetree/bindings/opp/kryo-cpufreq.txt new file mode 100644 index 00000000000000..c2127b96805a3e --- /dev/null +++ b/Documentation/devicetree/bindings/opp/kryo-cpufreq.txt @@ -0,0 +1,680 @@ +Qualcomm Technologies, Inc. KRYO CPUFreq and OPP bindings +=================================== + +In Certain Qualcomm Technologies, Inc. SoCs like apq8096 and msm8996 +that have KRYO processors, the CPU ferequencies subset and voltage value +of each OPP varies based on the silicon variant in use. +Qualcomm Technologies, Inc. Process Voltage Scaling Tables +defines the voltage and frequency value based on the msm-id in SMEM +and speedbin blown in the efuse combination. +The qcom-cpufreq-kryo driver reads the msm-id and efuse value from the SoC +to provide the OPP framework with required information (existing HW bitmap). +This is used to determine the voltage and frequency value for each OPP of +operating-points-v2 table when it is parsed by the OPP framework. + +Required properties: +-------------------- +In 'cpus' nodes: +- operating-points-v2: Phandle to the operating-points-v2 table to use. + +In 'operating-points-v2' table: +- compatible: Should be + - 'operating-points-v2-kryo-cpu' for apq8096 and msm8996. +- nvmem-cells: A phandle pointing to a nvmem-cells node representing the + efuse registers that has information about the + speedbin that is used to select the right frequency/voltage + value pair. + Please refer the for nvmem-cells + bindings Documentation/devicetree/bindings/nvmem/nvmem.txt + and also examples below. + +In every OPP node: +- opp-supported-hw: A single 32 bit bitmap value, representing compatible HW. + Bitmap: + 0: MSM8996 V3, speedbin 0 + 1: MSM8996 V3, speedbin 1 + 2: MSM8996 V3, speedbin 2 + 3: unused + 4: MSM8996 SG, speedbin 0 + 5: MSM8996 SG, speedbin 1 + 6: MSM8996 SG, speedbin 2 + 7-31: unused + +Example 1: +--------- + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + CPU0: cpu@0 { + device_type = "cpu"; + compatible = "qcom,kryo"; + reg = <0x0 0x0>; + enable-method = "psci"; + clocks = <&kryocc 0>; + cpu-supply = <&pm8994_s11_saw>; + operating-points-v2 = <&cluster0_opp>; + #cooling-cells = <2>; + next-level-cache = <&L2_0>; + L2_0: l2-cache { + compatible = "cache"; + cache-level = <2>; + }; + }; + + CPU1: cpu@1 { + device_type = "cpu"; + compatible = "qcom,kryo"; + reg = <0x0 0x1>; + enable-method = "psci"; + clocks = <&kryocc 0>; + cpu-supply = <&pm8994_s11_saw>; + operating-points-v2 = <&cluster0_opp>; + #cooling-cells = <2>; + next-level-cache = <&L2_0>; + }; + + CPU2: cpu@100 { + device_type = "cpu"; + compatible = "qcom,kryo"; + reg = <0x0 0x100>; + enable-method = "psci"; + clocks = <&kryocc 1>; + cpu-supply = <&pm8994_s11_saw>; + operating-points-v2 = <&cluster1_opp>; + #cooling-cells = <2>; + next-level-cache = <&L2_1>; + L2_1: l2-cache { + compatible = "cache"; + cache-level = <2>; + }; + }; + + CPU3: cpu@101 { + device_type = "cpu"; + compatible = "qcom,kryo"; + reg = <0x0 0x101>; + enable-method = "psci"; + clocks = <&kryocc 1>; + cpu-supply = <&pm8994_s11_saw>; + operating-points-v2 = <&cluster1_opp>; + #cooling-cells = <2>; + next-level-cache = <&L2_1>; + }; + + cpu-map { + cluster0 { + core0 { + cpu = <&CPU0>; + }; + + core1 { + cpu = <&CPU1>; + }; + }; + + cluster1 { + core0 { + cpu = <&CPU2>; + }; + + core1 { + cpu = <&CPU3>; + }; + }; + }; + }; + + cluster0_opp: opp_table0 { + compatible = "operating-points-v2-kryo-cpu"; + nvmem-cells = <&speedbin_efuse>; + opp-shared; + + opp-307200000 { + opp-hz = /bits/ 64 <307200000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x77>; + clock-latency-ns = <200000>; + }; + opp-384000000 { + opp-hz = /bits/ 64 <384000000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-422400000 { + opp-hz = /bits/ 64 <422400000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-460800000 { + opp-hz = /bits/ 64 <460800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-480000000 { + opp-hz = /bits/ 64 <480000000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-537600000 { + opp-hz = /bits/ 64 <537600000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-556800000 { + opp-hz = /bits/ 64 <556800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-614400000 { + opp-hz = /bits/ 64 <614400000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-652800000 { + opp-hz = /bits/ 64 <652800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-691200000 { + opp-hz = /bits/ 64 <691200000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-729600000 { + opp-hz = /bits/ 64 <729600000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-768000000 { + opp-hz = /bits/ 64 <768000000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-844800000 { + opp-hz = /bits/ 64 <844800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x77>; + clock-latency-ns = <200000>; + }; + opp-902400000 { + opp-hz = /bits/ 64 <902400000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-960000000 { + opp-hz = /bits/ 64 <960000000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-979200000 { + opp-hz = /bits/ 64 <979200000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1036800000 { + opp-hz = /bits/ 64 <1036800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1056000000 { + opp-hz = /bits/ 64 <1056000000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1113600000 { + opp-hz = /bits/ 64 <1113600000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1132800000 { + opp-hz = /bits/ 64 <1132800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1190400000 { + opp-hz = /bits/ 64 <1190400000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1209600000 { + opp-hz = /bits/ 64 <1209600000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1228800000 { + opp-hz = /bits/ 64 <1228800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1286400000 { + opp-hz = /bits/ 64 <1286400000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1324800000 { + opp-hz = /bits/ 64 <1324800000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x5>; + clock-latency-ns = <200000>; + }; + opp-1363200000 { + opp-hz = /bits/ 64 <1363200000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x72>; + clock-latency-ns = <200000>; + }; + opp-1401600000 { + opp-hz = /bits/ 64 <1401600000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x5>; + clock-latency-ns = <200000>; + }; + opp-1440000000 { + opp-hz = /bits/ 64 <1440000000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1478400000 { + opp-hz = /bits/ 64 <1478400000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x1>; + clock-latency-ns = <200000>; + }; + opp-1497600000 { + opp-hz = /bits/ 64 <1497600000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x4>; + clock-latency-ns = <200000>; + }; + opp-1516800000 { + opp-hz = /bits/ 64 <1516800000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1593600000 { + opp-hz = /bits/ 64 <1593600000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x71>; + clock-latency-ns = <200000>; + }; + opp-1996800000 { + opp-hz = /bits/ 64 <1996800000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x20>; + clock-latency-ns = <200000>; + }; + opp-2188800000 { + opp-hz = /bits/ 64 <2188800000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x10>; + clock-latency-ns = <200000>; + }; + }; + + cluster1_opp: opp_table1 { + compatible = "operating-points-v2-kryo-cpu"; + nvmem-cells = <&speedbin_efuse>; + opp-shared; + + opp-307200000 { + opp-hz = /bits/ 64 <307200000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x77>; + clock-latency-ns = <200000>; + }; + opp-384000000 { + opp-hz = /bits/ 64 <384000000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-403200000 { + opp-hz = /bits/ 64 <403200000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-460800000 { + opp-hz = /bits/ 64 <460800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-480000000 { + opp-hz = /bits/ 64 <480000000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-537600000 { + opp-hz = /bits/ 64 <537600000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-556800000 { + opp-hz = /bits/ 64 <556800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-614400000 { + opp-hz = /bits/ 64 <614400000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-652800000 { + opp-hz = /bits/ 64 <652800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-691200000 { + opp-hz = /bits/ 64 <691200000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-729600000 { + opp-hz = /bits/ 64 <729600000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-748800000 { + opp-hz = /bits/ 64 <748800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-806400000 { + opp-hz = /bits/ 64 <806400000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-825600000 { + opp-hz = /bits/ 64 <825600000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-883200000 { + opp-hz = /bits/ 64 <883200000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-902400000 { + opp-hz = /bits/ 64 <902400000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-940800000 { + opp-hz = /bits/ 64 <940800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-979200000 { + opp-hz = /bits/ 64 <979200000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1036800000 { + opp-hz = /bits/ 64 <1036800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1056000000 { + opp-hz = /bits/ 64 <1056000000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1113600000 { + opp-hz = /bits/ 64 <1113600000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1132800000 { + opp-hz = /bits/ 64 <1132800000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1190400000 { + opp-hz = /bits/ 64 <1190400000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1209600000 { + opp-hz = /bits/ 64 <1209600000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1248000000 { + opp-hz = /bits/ 64 <1248000000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1286400000 { + opp-hz = /bits/ 64 <1286400000>; + opp-microvolt = <905000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1324800000 { + opp-hz = /bits/ 64 <1324800000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1363200000 { + opp-hz = /bits/ 64 <1363200000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1401600000 { + opp-hz = /bits/ 64 <1401600000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1440000000 { + opp-hz = /bits/ 64 <1440000000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1478400000 { + opp-hz = /bits/ 64 <1478400000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1516800000 { + opp-hz = /bits/ 64 <1516800000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1555200000 { + opp-hz = /bits/ 64 <1555200000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1593600000 { + opp-hz = /bits/ 64 <1593600000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1632000000 { + opp-hz = /bits/ 64 <1632000000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1670400000 { + opp-hz = /bits/ 64 <1670400000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1708800000 { + opp-hz = /bits/ 64 <1708800000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1747200000 { + opp-hz = /bits/ 64 <1747200000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x70>; + clock-latency-ns = <200000>; + }; + opp-1785600000 { + opp-hz = /bits/ 64 <1785600000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1804800000 { + opp-hz = /bits/ 64 <1804800000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x6>; + clock-latency-ns = <200000>; + }; + opp-1824000000 { + opp-hz = /bits/ 64 <1824000000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x71>; + clock-latency-ns = <200000>; + }; + opp-1900800000 { + opp-hz = /bits/ 64 <1900800000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x74>; + clock-latency-ns = <200000>; + }; + opp-1920000000 { + opp-hz = /bits/ 64 <1920000000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x1>; + clock-latency-ns = <200000>; + }; + opp-1977600000 { + opp-hz = /bits/ 64 <1977600000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x30>; + clock-latency-ns = <200000>; + }; + opp-1996800000 { + opp-hz = /bits/ 64 <1996800000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x1>; + clock-latency-ns = <200000>; + }; + opp-2054400000 { + opp-hz = /bits/ 64 <2054400000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x30>; + clock-latency-ns = <200000>; + }; + opp-2073600000 { + opp-hz = /bits/ 64 <2073600000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x1>; + clock-latency-ns = <200000>; + }; + opp-2150400000 { + opp-hz = /bits/ 64 <2150400000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x31>; + clock-latency-ns = <200000>; + }; + opp-2246400000 { + opp-hz = /bits/ 64 <2246400000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x10>; + clock-latency-ns = <200000>; + }; + opp-2342400000 { + opp-hz = /bits/ 64 <2342400000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x10>; + clock-latency-ns = <200000>; + }; + }; + +.... + +reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; +.... + smem_mem: smem-mem@86000000 { + reg = <0x0 0x86000000 0x0 0x200000>; + no-map; + }; +.... +}; + +smem { + compatible = "qcom,smem"; + memory-region = <&smem_mem>; + hwlocks = <&tcsr_mutex 3>; +}; + +soc { +.... + qfprom: qfprom@74000 { + compatible = "qcom,qfprom"; + reg = <0x00074000 0x8ff>; + #address-cells = <1>; + #size-cells = <1>; + .... + speedbin_efuse: speedbin@133 { + reg = <0x133 0x1>; + bits = <5 3>; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 4e4f30288c8bb7..c396c4c0af92db 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -82,7 +82,10 @@ This defines voltage-current-frequency combinations along with other related properties. Required properties: -- opp-hz: Frequency in Hz, expressed as a 64-bit big-endian integer. +- opp-hz: Frequency in Hz, expressed as a 64-bit big-endian integer. This is a + required property for all device nodes but devices like power domains. The + power domain nodes must have another (implementation dependent) property which + uniquely identifies the OPP nodes. Optional properties: - opp-microvolt: voltage in micro Volts. @@ -159,7 +162,7 @@ Optional properties: - status: Marks the node enabled/disabled. -- required-opp: This contains phandle to an OPP node in another device's OPP +- required-opps: This contains phandle to an OPP node in another device's OPP table. It may contain an array of phandles, where each phandle points to an OPP of a different device. It should not contain multiple phandles to the OPP nodes in the same OPP table. This specifies the minimum required OPP of the diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt index f3355313c020f7..4733f76cbe48da 100644 --- a/Documentation/devicetree/bindings/power/power_domain.txt +++ b/Documentation/devicetree/bindings/power/power_domain.txt @@ -127,7 +127,7 @@ inside a PM domain with index 0 of a power controller represented by a node with the label "power". Optional properties: -- required-opp: This contains phandle to an OPP node in another device's OPP +- required-opps: This contains phandle to an OPP node in another device's OPP table. It may contain an array of phandles, where each phandle points to an OPP of a different device. It should not contain multiple phandles to the OPP nodes in the same OPP table. This specifies the minimum required OPP of the @@ -175,14 +175,14 @@ Example: compatible = "foo,i-leak-current"; reg = <0x12350000 0x1000>; power-domains = <&power 0>; - required-opp = <&domain0_opp_0>; + required-opps = <&domain0_opp_0>; }; leaky-device1@12350000 { compatible = "foo,i-leak-current"; reg = <0x12350000 0x1000>; power-domains = <&power 1>; - required-opp = <&domain1_opp_1>; + required-opps = <&domain1_opp_1>; }; [1]. Documentation/devicetree/bindings/power/domain-idle-state.txt diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt index 4a4766e9c254e1..e66fd4eab71cba 100644 --- a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt +++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt @@ -31,6 +31,8 @@ SoC is on the same page. Required properties: - compatible: should be one of: + - "rockchip,px30-io-voltage-domain" for px30 + - "rockchip,px30-pmu-io-voltage-domain" for px30 pmu-domains - "rockchip,rk3188-io-voltage-domain" for rk3188 - "rockchip,rk3228-io-voltage-domain" for rk3228 - "rockchip,rk3288-io-voltage-domain" for rk3288 @@ -51,6 +53,19 @@ a phandle the relevant regulator. All specified supplies must be able to report their voltage. The IO Voltage Domain for any non-specified supplies will be not be touched. +Possible supplies for PX30: +- vccio6-supply: The supply connected to VCCIO6. +- vccio1-supply: The supply connected to VCCIO1. +- vccio2-supply: The supply connected to VCCIO2. +- vccio3-supply: The supply connected to VCCIO3. +- vccio4-supply: The supply connected to VCCIO4. +- vccio5-supply: The supply connected to VCCIO5. +- vccio-oscgpi-supply: The supply connected to VCCIO_OSCGPI. + +Possible supplies for PX30 pmu-domains: +- pmuio1-supply: The supply connected to PMUIO1. +- pmuio2-supply: The supply connected to PMUIO2. + Possible supplies for rk3188: - ap0-supply: The supply connected to AP0_VCC. - ap1-supply: The supply connected to AP1_VCC. diff --git a/MAINTAINERS b/MAINTAINERS index c3e2e7076a951c..a2254aced8fa2e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11663,6 +11663,13 @@ F: Documentation/devicetree/bindings/media/qcom,camss.txt F: Documentation/media/v4l-drivers/qcom_camss.rst F: drivers/media/platform/qcom/camss-8x16/ +QUALCOMM CPUFREQ DRIVER MSM8996/APQ8096 +M: Ilia Lin +L: linux-pm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/opp/kryo-cpufreq.txt +F: drivers/cpufreq/qcom-cpufreq-kryo.c + QUALCOMM EMAC GIGABIT ETHERNET DRIVER M: Timur Tabi L: netdev@vger.kernel.org @@ -14380,6 +14387,15 @@ S: Maintained F: drivers/tc/ F: include/linux/tc.h +TURBOSTAT UTILITY +M: "Len Brown" +L: linux-pm@vger.kernel.org +B: https://bugzilla.kernel.org +Q: https://patchwork.kernel.org/project/linux-pm/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat +S: Supported +F: tools/power/x86/turbostat/ + TW5864 VIDEO4LINUX DRIVER M: Bluecherry Maintainers M: Anton Sviridenko diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fda2114197b369..68b2c3150de153 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -62,8 +62,8 @@ #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 28) #define MSR_MTRRcap 0x000000fe diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c index abbd59063906c5..e10fec99a182ec 100644 --- a/drivers/acpi/acpica/evgpe.c +++ b/drivers/acpi/acpica/evgpe.c @@ -634,6 +634,12 @@ acpi_ev_detect_gpe(struct acpi_namespace_node *gpe_device, flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock); + if (!gpe_event_info) { + gpe_event_info = acpi_ev_get_gpe_event_info(gpe_device, gpe_number); + if (!gpe_event_info) + goto error_exit; + } + /* Get the info block for the entire GPE register */ gpe_register_info = gpe_event_info->register_info; diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c index c80e3bdf480570..b2d5f66cc1b055 100644 --- a/drivers/acpi/acpica/evxfgpe.c +++ b/drivers/acpi/acpica/evxfgpe.c @@ -637,6 +637,28 @@ acpi_get_gpe_status(acpi_handle gpe_device, ACPI_EXPORT_SYMBOL(acpi_get_gpe_status) +/******************************************************************************* + * + * FUNCTION: acpi_gispatch_gpe + * + * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1 + * gpe_number - GPE level within the GPE block + * + * RETURN: None + * + * DESCRIPTION: Detect and dispatch a General Purpose Event to either a function + * (e.g. EC) or method (e.g. _Lxx/_Exx) handler. + * + ******************************************************************************/ +void acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number) +{ + ACPI_FUNCTION_TRACE(acpi_dispatch_gpe); + + acpi_ev_detect_gpe(gpe_device, NULL, gpe_number); +} + +ACPI_EXPORT_SYMBOL(acpi_dispatch_gpe) + /******************************************************************************* * * FUNCTION: acpi_finish_gpe diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 3d96e4da2d98c9..a7c2673ffd36e8 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1257,10 +1257,7 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on) struct acpi_device *adev = ACPI_COMPANION(dev); if (!adev) - return -ENODEV; - - if (dev->pm_domain) - return -EEXIST; + return 0; /* * Only attach the power domain to the first device if the @@ -1268,7 +1265,7 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on) * management twice. */ if (!acpi_device_is_first_physical_node(adev, dev)) - return -EBUSY; + return 0; acpi_add_pm_notifier(adev, dev, acpi_pm_notify_work_func); dev_pm_domain_set(dev, &acpi_general_pm_domain); @@ -1278,7 +1275,7 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on) } dev->pm_domain->detach = acpi_dev_pm_detach; - return 0; + return 1; } EXPORT_SYMBOL_GPL(acpi_dev_pm_attach); #endif /* CONFIG_PM */ diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 30a5729565575f..bb94cf0731feb9 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1034,6 +1034,12 @@ void acpi_ec_unblock_transactions(void) acpi_ec_start(first_ec, true); } +void acpi_ec_dispatch_gpe(void) +{ + if (first_ec) + acpi_dispatch_gpe(NULL, first_ec->gpe); +} + /* -------------------------------------------------------------------------- Event Management -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 1d0a501bc7f0cb..530a3f67549049 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -188,6 +188,7 @@ int acpi_ec_ecdt_probe(void); int acpi_ec_dsdt_probe(void); void acpi_ec_block_transactions(void); void acpi_ec_unblock_transactions(void); +void acpi_ec_dispatch_gpe(void); int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, acpi_handle handle, acpi_ec_query_func func, void *data); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 974e58457697fc..5d0486f1cfcd5e 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -989,6 +989,13 @@ static void acpi_s2idle_wake(void) !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) { pm_system_cancel_wakeup(); s2idle_wakeup = true; + /* + * On some platforms with the LPS0 _DSM device noirq resume + * takes too much time for EC wakeup events to survive, so look + * for them now. + */ + if (lps0_device_handle) + acpi_ec_dispatch_gpe(); } } diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 3b0118786b4369..0cf51d57e4ae87 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -251,7 +251,7 @@ static int amba_probe(struct device *dev) break; ret = dev_pm_domain_attach(dev, true); - if (ret == -EPROBE_DEFER) + if (ret) break; ret = amba_get_enable_pclk(pcdev); @@ -378,7 +378,7 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent) } ret = dev_pm_domain_attach(&dev->dev, true); - if (ret == -EPROBE_DEFER) { + if (ret) { iounmap(tmp); goto err_release; } diff --git a/drivers/base/base.h b/drivers/base/base.h index d800de650fa56b..a75c3025fb7886 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -161,3 +161,6 @@ extern void device_links_driver_cleanup(struct device *dev); extern void device_links_no_driver(struct device *dev); extern bool device_links_busy(struct device *dev); extern void device_links_unbind_consumers(struct device *dev); + +/* device pm support */ +void device_pm_move_to_tail(struct device *dev); diff --git a/drivers/base/core.c b/drivers/base/core.c index b610816eb887f9..ad7b50897bcc70 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -144,6 +144,26 @@ static int device_reorder_to_tail(struct device *dev, void *not_used) return 0; } +/** + * device_pm_move_to_tail - Move set of devices to the end of device lists + * @dev: Device to move + * + * This is a device_reorder_to_tail() wrapper taking the requisite locks. + * + * It moves the @dev along with all of its children and all of its consumers + * to the ends of the device_kset and dpm_list, recursively. + */ +void device_pm_move_to_tail(struct device *dev) +{ + int idx; + + idx = device_links_read_lock(); + device_pm_lock(); + device_reorder_to_tail(dev, NULL); + device_pm_unlock(); + device_links_read_unlock(idx); +} + /** * device_link_add - Create a link between two devices. * @consumer: Consumer end of the link. diff --git a/drivers/base/dd.c b/drivers/base/dd.c index c9f54089429bde..a41c91bfac0e9c 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -122,9 +122,7 @@ static void deferred_probe_work_func(struct work_struct *work) * the list is a good order for suspend but deferred * probe makes that very unsafe. */ - device_pm_lock(); - device_pm_move_last(dev); - device_pm_unlock(); + device_pm_move_to_tail(dev); dev_dbg(dev, "Retrying from deferred list\n"); if (initcall_debug && !initcalls_done) @@ -582,7 +580,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) pr_debug("bus: '%s': %s: matched device %s with driver %s\n", drv->bus->name, __func__, dev_name(dev), drv->name); - pm_runtime_get_suppliers(dev); + pm_runtime_resume_suppliers(dev); if (dev->parent) pm_runtime_get_sync(dev->parent); @@ -593,7 +591,6 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) if (dev->parent) pm_runtime_put(dev->parent); - pm_runtime_put_suppliers(dev); return ret; } diff --git a/drivers/base/platform.c b/drivers/base/platform.c index c0ff1e73a634c3..dff82a3c2caa90 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -572,17 +572,16 @@ static int platform_drv_probe(struct device *_dev) return ret; ret = dev_pm_domain_attach(_dev, true); - if (ret != -EPROBE_DEFER) { - if (drv->probe) { - ret = drv->probe(dev); - if (ret) - dev_pm_domain_detach(_dev, true); - } else { - /* don't fail if just dev_pm_domain_attach failed */ - ret = 0; - } + if (ret) + goto out; + + if (drv->probe) { + ret = drv->probe(dev); + if (ret) + dev_pm_domain_detach(_dev, true); } +out: if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) { dev_warn(_dev, "probe deferral not supported\n"); ret = -ENXIO; diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index f6a9ad52cbbff0..7ae62b6355b8d9 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -98,17 +98,21 @@ EXPORT_SYMBOL_GPL(dev_pm_put_subsys_data); * Callers must ensure proper synchronization of this function with power * management callbacks. * - * Returns 0 on successfully attached PM domain or negative error code. + * Returns 0 on successfully attached PM domain, or when it is found that the + * device doesn't need a PM domain, else a negative error code. */ int dev_pm_domain_attach(struct device *dev, bool power_on) { int ret; + if (dev->pm_domain) + return 0; + ret = acpi_dev_pm_attach(dev, power_on); - if (ret) + if (!ret) ret = genpd_dev_pm_attach(dev); - return ret; + return ret < 0 ? ret : 0; } EXPORT_SYMBOL_GPL(dev_pm_domain_attach); diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 1ea0e2502e8ed4..6f403d6fccb2b3 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -1315,7 +1316,6 @@ EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron); #endif /* CONFIG_PM_SLEEP */ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev, - struct generic_pm_domain *genpd, struct gpd_timing_data *td) { struct generic_pm_domain_data *gpd_data; @@ -1377,24 +1377,19 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct gpd_timing_data *td) { struct generic_pm_domain_data *gpd_data; - int ret = 0; + int ret; dev_dbg(dev, "%s()\n", __func__); if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) return -EINVAL; - gpd_data = genpd_alloc_dev_data(dev, genpd, td); + gpd_data = genpd_alloc_dev_data(dev, td); if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); genpd_lock(genpd); - if (genpd->prepared_count > 0) { - ret = -EAGAIN; - goto out; - } - ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0; if (ret) goto out; @@ -1418,23 +1413,21 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, } /** - * __pm_genpd_add_device - Add a device to an I/O PM domain. + * pm_genpd_add_device - Add a device to an I/O PM domain. * @genpd: PM domain to add the device to. * @dev: Device to be added. - * @td: Set of PM QoS timing parameters to attach to the device. */ -int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, - struct gpd_timing_data *td) +int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) { int ret; mutex_lock(&gpd_list_lock); - ret = genpd_add_device(genpd, dev, td); + ret = genpd_add_device(genpd, dev, NULL); mutex_unlock(&gpd_list_lock); return ret; } -EXPORT_SYMBOL_GPL(__pm_genpd_add_device); +EXPORT_SYMBOL_GPL(pm_genpd_add_device); static int genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev) @@ -1481,13 +1474,13 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, /** * pm_genpd_remove_device - Remove a device from an I/O PM domain. - * @genpd: PM domain to remove the device from. * @dev: Device to be removed. */ -int pm_genpd_remove_device(struct generic_pm_domain *genpd, - struct device *dev) +int pm_genpd_remove_device(struct device *dev) { - if (!genpd || genpd != genpd_lookup_dev(dev)) + struct generic_pm_domain *genpd = genpd_lookup_dev(dev); + + if (!genpd) return -EINVAL; return genpd_remove_device(genpd, dev); @@ -1696,6 +1689,9 @@ int pm_genpd_init(struct generic_pm_domain *genpd, return ret; } + device_initialize(&genpd->dev); + dev_set_name(&genpd->dev, "%s", genpd->name); + mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); @@ -1892,14 +1888,33 @@ int of_genpd_add_provider_simple(struct device_node *np, mutex_lock(&gpd_list_lock); - if (genpd_present(genpd)) { - ret = genpd_add_provider(np, genpd_xlate_simple, genpd); - if (!ret) { - genpd->provider = &np->fwnode; - genpd->has_provider = true; + if (!genpd_present(genpd)) + goto unlock; + + genpd->dev.of_node = np; + + /* Parse genpd OPP table */ + if (genpd->set_performance_state) { + ret = dev_pm_opp_of_add_table(&genpd->dev); + if (ret) { + dev_err(&genpd->dev, "Failed to add OPP table: %d\n", + ret); + goto unlock; } } + ret = genpd_add_provider(np, genpd_xlate_simple, genpd); + if (ret) { + if (genpd->set_performance_state) + dev_pm_opp_of_remove_table(&genpd->dev); + + goto unlock; + } + + genpd->provider = &np->fwnode; + genpd->has_provider = true; + +unlock: mutex_unlock(&gpd_list_lock); return ret; @@ -1914,6 +1929,7 @@ EXPORT_SYMBOL_GPL(of_genpd_add_provider_simple); int of_genpd_add_provider_onecell(struct device_node *np, struct genpd_onecell_data *data) { + struct generic_pm_domain *genpd; unsigned int i; int ret = -EINVAL; @@ -1926,13 +1942,27 @@ int of_genpd_add_provider_onecell(struct device_node *np, data->xlate = genpd_xlate_onecell; for (i = 0; i < data->num_domains; i++) { - if (!data->domains[i]) + genpd = data->domains[i]; + + if (!genpd) continue; - if (!genpd_present(data->domains[i])) + if (!genpd_present(genpd)) goto error; - data->domains[i]->provider = &np->fwnode; - data->domains[i]->has_provider = true; + genpd->dev.of_node = np; + + /* Parse genpd OPP table */ + if (genpd->set_performance_state) { + ret = dev_pm_opp_of_add_table_indexed(&genpd->dev, i); + if (ret) { + dev_err(&genpd->dev, "Failed to add OPP table for index %d: %d\n", + i, ret); + goto error; + } + } + + genpd->provider = &np->fwnode; + genpd->has_provider = true; } ret = genpd_add_provider(np, data->xlate, data); @@ -1945,10 +1975,16 @@ int of_genpd_add_provider_onecell(struct device_node *np, error: while (i--) { - if (!data->domains[i]) + genpd = data->domains[i]; + + if (!genpd) continue; - data->domains[i]->provider = NULL; - data->domains[i]->has_provider = false; + + genpd->provider = NULL; + genpd->has_provider = false; + + if (genpd->set_performance_state) + dev_pm_opp_of_remove_table(&genpd->dev); } mutex_unlock(&gpd_list_lock); @@ -1975,10 +2011,17 @@ void of_genpd_del_provider(struct device_node *np) * provider, set the 'has_provider' to false * so that the PM domain can be safely removed. */ - list_for_each_entry(gpd, &gpd_list, gpd_list_node) - if (gpd->provider == &np->fwnode) + list_for_each_entry(gpd, &gpd_list, gpd_list_node) { + if (gpd->provider == &np->fwnode) { gpd->has_provider = false; + if (!gpd->set_performance_state) + continue; + + dev_pm_opp_of_remove_table(&gpd->dev); + } + } + list_del(&cp->link); of_node_put(cp->node); kfree(cp); @@ -2185,31 +2228,25 @@ static void genpd_dev_pm_sync(struct device *dev) * Parse device's OF node to find a PM domain specifier. If such is found, * attaches the device to retrieved pm_domain ops. * - * Both generic and legacy Samsung-specific DT bindings are supported to keep - * backwards compatibility with existing DTBs. - * - * Returns 0 on successfully attached PM domain or negative error code. Note - * that if a power-domain exists for the device, but it cannot be found or - * turned on, then return -EPROBE_DEFER to ensure that the device is not - * probed and to re-try again later. + * Returns 1 on successfully attached PM domain, 0 when the device don't need a + * PM domain or a negative error code in case of failures. Note that if a + * power-domain exists for the device, but it cannot be found or turned on, + * then return -EPROBE_DEFER to ensure that the device is not probed and to + * re-try again later. */ int genpd_dev_pm_attach(struct device *dev) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; - unsigned int i; int ret; if (!dev->of_node) - return -ENODEV; - - if (dev->pm_domain) - return -EEXIST; + return 0; ret = of_parse_phandle_with_args(dev->of_node, "power-domains", "#power-domain-cells", 0, &pd_args); if (ret < 0) - return ret; + return 0; mutex_lock(&gpd_list_lock); pd = genpd_get_from_provider(&pd_args); @@ -2223,21 +2260,14 @@ int genpd_dev_pm_attach(struct device *dev) dev_dbg(dev, "adding to PM domain %s\n", pd->name); - for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) { - ret = genpd_add_device(pd, dev, NULL); - if (ret != -EAGAIN) - break; - - mdelay(i); - cond_resched(); - } + ret = genpd_add_device(pd, dev, NULL); mutex_unlock(&gpd_list_lock); if (ret < 0) { if (ret != -EPROBE_DEFER) dev_err(dev, "failed to add to PM domain %s: %d", pd->name, ret); - goto out; + return ret; } dev->pm_domain->detach = genpd_dev_pm_detach; @@ -2246,8 +2276,11 @@ int genpd_dev_pm_attach(struct device *dev) genpd_lock(pd); ret = genpd_power_on(pd, 0); genpd_unlock(pd); -out: - return ret ? -EPROBE_DEFER : 0; + + if (ret) + genpd_remove_device(pd, dev); + + return ret ? -EPROBE_DEFER : 1; } EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); @@ -2361,6 +2394,55 @@ int of_genpd_parse_idle_states(struct device_node *dn, } EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states); +/** + * of_genpd_opp_to_performance_state- Gets performance state of device's + * power domain corresponding to a DT node's "required-opps" property. + * + * @dev: Device for which the performance-state needs to be found. + * @opp_node: DT node where the "required-opps" property is present. This can be + * the device node itself (if it doesn't have an OPP table) or a node + * within the OPP table of a device (if device has an OPP table). + * @state: Pointer to return performance state. + * + * Returns performance state corresponding to the "required-opps" property of + * a DT node. This calls platform specific genpd->opp_to_performance_state() + * callback to translate power domain OPP to performance state. + * + * Returns performance state on success and 0 on failure. + */ +unsigned int of_genpd_opp_to_performance_state(struct device *dev, + struct device_node *opp_node) +{ + struct generic_pm_domain *genpd; + struct dev_pm_opp *opp; + int state = 0; + + genpd = dev_to_genpd(dev); + if (IS_ERR(genpd)) + return 0; + + if (unlikely(!genpd->set_performance_state)) + return 0; + + genpd_lock(genpd); + + opp = of_dev_pm_opp_find_required_opp(&genpd->dev, opp_node); + if (IS_ERR(opp)) { + dev_err(dev, "Failed to find required OPP: %ld\n", + PTR_ERR(opp)); + goto unlock; + } + + state = genpd->opp_to_performance_state(genpd, opp); + dev_pm_opp_put(opp); + +unlock: + genpd_unlock(genpd); + + return state; +} +EXPORT_SYMBOL_GPL(of_genpd_opp_to_performance_state); + #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ @@ -2628,6 +2710,19 @@ static int genpd_devices_show(struct seq_file *s, void *data) return ret; } +static int genpd_perf_state_show(struct seq_file *s, void *data) +{ + struct generic_pm_domain *genpd = s->private; + + if (genpd_lock_interruptible(genpd)) + return -ERESTARTSYS; + + seq_printf(s, "%u\n", genpd->performance_state); + + genpd_unlock(genpd); + return 0; +} + #define define_genpd_open_function(name) \ static int genpd_##name##_open(struct inode *inode, struct file *file) \ { \ @@ -2641,6 +2736,7 @@ define_genpd_open_function(idle_states); define_genpd_open_function(active_time); define_genpd_open_function(total_idle_time); define_genpd_open_function(devices); +define_genpd_open_function(perf_state); #define define_genpd_debugfs_fops(name) \ static const struct file_operations genpd_##name##_fops = { \ @@ -2657,6 +2753,7 @@ define_genpd_debugfs_fops(idle_states); define_genpd_debugfs_fops(active_time); define_genpd_debugfs_fops(total_idle_time); define_genpd_debugfs_fops(devices); +define_genpd_debugfs_fops(perf_state); static int __init genpd_debug_init(void) { @@ -2690,6 +2787,9 @@ static int __init genpd_debug_init(void) d, genpd, &genpd_total_idle_time_fops); debugfs_create_file("devices", 0444, d, genpd, &genpd_devices_fops); + if (genpd->set_performance_state) + debugfs_create_file("perf_state", 0444, + d, genpd, &genpd_perf_state_fops); } return 0; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index e5e067091572e0..3f68e2919dc5da 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -192,34 +192,31 @@ void device_pm_move_last(struct device *dev) list_move_tail(&dev->power.entry, &dpm_list); } -static ktime_t initcall_debug_start(struct device *dev) +static ktime_t initcall_debug_start(struct device *dev, void *cb) { - ktime_t calltime = 0; - - if (pm_print_times_enabled) { - pr_info("calling %s+ @ %i, parent: %s\n", - dev_name(dev), task_pid_nr(current), - dev->parent ? dev_name(dev->parent) : "none"); - calltime = ktime_get(); - } + if (!pm_print_times_enabled) + return 0; - return calltime; + dev_info(dev, "calling %pF @ %i, parent: %s\n", cb, + task_pid_nr(current), + dev->parent ? dev_name(dev->parent) : "none"); + return ktime_get(); } static void initcall_debug_report(struct device *dev, ktime_t calltime, - int error, pm_message_t state, - const char *info) + void *cb, int error) { ktime_t rettime; s64 nsecs; + if (!pm_print_times_enabled) + return; + rettime = ktime_get(); nsecs = (s64) ktime_to_ns(ktime_sub(rettime, calltime)); - if (pm_print_times_enabled) { - pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev), - error, (unsigned long long)nsecs >> 10); - } + dev_info(dev, "%pF returned %d after %Ld usecs\n", cb, error, + (unsigned long long)nsecs >> 10); } /** @@ -446,7 +443,7 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev, if (!cb) return 0; - calltime = initcall_debug_start(dev); + calltime = initcall_debug_start(dev, cb); pm_dev_dbg(dev, state, info); trace_device_pm_callback_start(dev, info, state.event); @@ -454,7 +451,7 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev, trace_device_pm_callback_end(dev, error); suspend_report_result(cb, error); - initcall_debug_report(dev, calltime, error, state, info); + initcall_debug_report(dev, calltime, cb, error); return error; } @@ -1664,14 +1661,14 @@ static int legacy_suspend(struct device *dev, pm_message_t state, int error; ktime_t calltime; - calltime = initcall_debug_start(dev); + calltime = initcall_debug_start(dev, cb); trace_device_pm_callback_start(dev, info, state.event); error = cb(dev, state); trace_device_pm_callback_end(dev, error); suspend_report_result(cb, error); - initcall_debug_report(dev, calltime, error, state, info); + initcall_debug_report(dev, calltime, cb, error); return error; } diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 86e67e70b509db..c511def48b4862 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -56,14 +56,6 @@ static inline void device_wakeup_detach_irq(struct device *dev) { } -static inline void device_wakeup_arm_wake_irqs(void) -{ -} - -static inline void device_wakeup_disarm_wake_irqs(void) -{ -} - #endif /* CONFIG_PM_SLEEP */ /* @@ -95,28 +87,6 @@ static inline void pm_runtime_remove(struct device *dev) {} static inline int dpm_sysfs_add(struct device *dev) { return 0; } static inline void dpm_sysfs_remove(struct device *dev) {} -static inline void rpm_sysfs_remove(struct device *dev) {} -static inline int wakeup_sysfs_add(struct device *dev) { return 0; } -static inline void wakeup_sysfs_remove(struct device *dev) {} -static inline int pm_qos_sysfs_add(struct device *dev) { return 0; } -static inline void pm_qos_sysfs_remove(struct device *dev) {} - -static inline void dev_pm_arm_wake_irq(struct wake_irq *wirq) -{ -} - -static inline void dev_pm_disarm_wake_irq(struct wake_irq *wirq) -{ -} - -static inline void dev_pm_enable_wake_irq_check(struct device *dev, - bool can_change_status) -{ -} - -static inline void dev_pm_disable_wake_irq_check(struct device *dev) -{ -} #endif diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 8bef3cb2424da7..c6030f100c087f 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1563,37 +1563,16 @@ void pm_runtime_clean_up_links(struct device *dev) } /** - * pm_runtime_get_suppliers - Resume and reference-count supplier devices. + * pm_runtime_resume_suppliers - Resume supplier devices. * @dev: Consumer device. */ -void pm_runtime_get_suppliers(struct device *dev) +void pm_runtime_resume_suppliers(struct device *dev) { - struct device_link *link; - int idx; - - idx = device_links_read_lock(); - - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) - if (link->flags & DL_FLAG_PM_RUNTIME) - pm_runtime_get_sync(link->supplier); - - device_links_read_unlock(idx); -} - -/** - * pm_runtime_put_suppliers - Drop references to supplier devices. - * @dev: Consumer device. - */ -void pm_runtime_put_suppliers(struct device *dev) -{ - struct device_link *link; int idx; idx = device_links_read_lock(); - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) - if (link->flags & DL_FLAG_PM_RUNTIME) - pm_runtime_put(link->supplier); + rpm_get_suppliers(dev); device_links_read_unlock(idx); } @@ -1607,6 +1586,8 @@ void pm_runtime_new_link(struct device *dev) void pm_runtime_drop_link(struct device *dev) { + rpm_put_suppliers(dev); + spin_lock_irq(&dev->power.lock); WARN_ON(dev->power.links_count == 0); dev->power.links_count--; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index ea01621ed769ab..5fa1898755a348 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -57,7 +57,7 @@ static void split_counters(unsigned int *cnt, unsigned int *inpr) /* A preserved old value of the events counter. */ static unsigned int saved_count; -static DEFINE_SPINLOCK(events_lock); +static DEFINE_RAW_SPINLOCK(events_lock); static void pm_wakeup_timer_fn(struct timer_list *t); @@ -183,11 +183,10 @@ void wakeup_source_add(struct wakeup_source *ws) spin_lock_init(&ws->lock); timer_setup(&ws->timer, pm_wakeup_timer_fn, 0); ws->active = false; - ws->last_time = ktime_get(); - spin_lock_irqsave(&events_lock, flags); + raw_spin_lock_irqsave(&events_lock, flags); list_add_rcu(&ws->entry, &wakeup_sources); - spin_unlock_irqrestore(&events_lock, flags); + raw_spin_unlock_irqrestore(&events_lock, flags); } EXPORT_SYMBOL_GPL(wakeup_source_add); @@ -202,9 +201,9 @@ void wakeup_source_remove(struct wakeup_source *ws) if (WARN_ON(!ws)) return; - spin_lock_irqsave(&events_lock, flags); + raw_spin_lock_irqsave(&events_lock, flags); list_del_rcu(&ws->entry); - spin_unlock_irqrestore(&events_lock, flags); + raw_spin_unlock_irqrestore(&events_lock, flags); synchronize_srcu(&wakeup_srcu); } EXPORT_SYMBOL_GPL(wakeup_source_remove); @@ -843,7 +842,7 @@ bool pm_wakeup_pending(void) unsigned long flags; bool ret = false; - spin_lock_irqsave(&events_lock, flags); + raw_spin_lock_irqsave(&events_lock, flags); if (events_check_enabled) { unsigned int cnt, inpr; @@ -851,10 +850,10 @@ bool pm_wakeup_pending(void) ret = (cnt != saved_count || inpr > 0); events_check_enabled = !ret; } - spin_unlock_irqrestore(&events_lock, flags); + raw_spin_unlock_irqrestore(&events_lock, flags); if (ret) { - pr_info("PM: Wakeup pending, aborting suspend\n"); + pr_debug("PM: Wakeup pending, aborting suspend\n"); pm_print_active_wakeup_sources(); } @@ -940,13 +939,13 @@ bool pm_save_wakeup_count(unsigned int count) unsigned long flags; events_check_enabled = false; - spin_lock_irqsave(&events_lock, flags); + raw_spin_lock_irqsave(&events_lock, flags); split_counters(&cnt, &inpr); if (cnt == count && inpr == 0) { saved_count = count; events_check_enabled = true; } - spin_unlock_irqrestore(&events_lock, flags); + raw_spin_unlock_irqrestore(&events_lock, flags); return events_check_enabled; } @@ -1029,32 +1028,75 @@ static int print_wakeup_source_stats(struct seq_file *m, return 0; } -/** - * wakeup_sources_stats_show - Print wakeup sources statistics information. - * @m: seq_file to print the statistics into. - */ -static int wakeup_sources_stats_show(struct seq_file *m, void *unused) +static void *wakeup_sources_stats_seq_start(struct seq_file *m, + loff_t *pos) { struct wakeup_source *ws; - int srcuidx; + loff_t n = *pos; + int *srcuidx = m->private; - seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" - "expire_count\tactive_since\ttotal_time\tmax_time\t" - "last_change\tprevent_suspend_time\n"); + if (n == 0) { + seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" + "expire_count\tactive_since\ttotal_time\tmax_time\t" + "last_change\tprevent_suspend_time\n"); + } - srcuidx = srcu_read_lock(&wakeup_srcu); - list_for_each_entry_rcu(ws, &wakeup_sources, entry) - print_wakeup_source_stats(m, ws); - srcu_read_unlock(&wakeup_srcu, srcuidx); + *srcuidx = srcu_read_lock(&wakeup_srcu); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + if (n-- <= 0) + return ws; + } + + return NULL; +} + +static void *wakeup_sources_stats_seq_next(struct seq_file *m, + void *v, loff_t *pos) +{ + struct wakeup_source *ws = v; + struct wakeup_source *next_ws = NULL; - print_wakeup_source_stats(m, &deleted_ws); + ++(*pos); + + list_for_each_entry_continue_rcu(ws, &wakeup_sources, entry) { + next_ws = ws; + break; + } + + return next_ws; +} + +static void wakeup_sources_stats_seq_stop(struct seq_file *m, void *v) +{ + int *srcuidx = m->private; + + srcu_read_unlock(&wakeup_srcu, *srcuidx); +} + +/** + * wakeup_sources_stats_seq_show - Print wakeup sources statistics information. + * @m: seq_file to print the statistics into. + * @v: wakeup_source of each iteration + */ +static int wakeup_sources_stats_seq_show(struct seq_file *m, void *v) +{ + struct wakeup_source *ws = v; + + print_wakeup_source_stats(m, ws); return 0; } +static const struct seq_operations wakeup_sources_stats_seq_ops = { + .start = wakeup_sources_stats_seq_start, + .next = wakeup_sources_stats_seq_next, + .stop = wakeup_sources_stats_seq_stop, + .show = wakeup_sources_stats_seq_show, +}; + static int wakeup_sources_stats_open(struct inode *inode, struct file *file) { - return single_open(file, wakeup_sources_stats_show, NULL); + return seq_open_private(file, &wakeup_sources_stats_seq_ops, sizeof(int)); } static const struct file_operations wakeup_sources_stats_fops = { @@ -1062,7 +1104,7 @@ static const struct file_operations wakeup_sources_stats_fops = { .open = wakeup_sources_stats_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = seq_release_private, }; static int __init wakeup_sources_debugfs_init(void) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 96b35b8b36060f..c7ce928fbf1f55 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -124,6 +124,17 @@ config ARM_OMAP2PLUS_CPUFREQ depends on ARCH_OMAP2PLUS default ARCH_OMAP2PLUS +config ARM_QCOM_CPUFREQ_KRYO + bool "Qualcomm Kryo based CPUFreq" + depends on ARM64 + depends on QCOM_QFPROM + depends on QCOM_SMEM + select PM_OPP + help + This adds the CPUFreq driver for Qualcomm Kryo SoC based boards. + + If in doubt, say N. + config ARM_S3C_CPUFREQ bool help @@ -264,7 +275,7 @@ config ARM_TANGO_CPUFREQ default y config ARM_TEGRA20_CPUFREQ - bool "Tegra20 CPUFreq support" + tristate "Tegra20 CPUFreq support" depends on ARCH_TEGRA default y help diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 8d24ade3bd0209..fb4a2ecac43bf5 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_MACH_MVEBU_V7) += mvebu-cpufreq.o obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_PXA2xx_CPUFREQ) += pxa2xx-cpufreq.o obj-$(CONFIG_PXA3xx) += pxa3xx-cpufreq.o +obj-$(CONFIG_ARM_QCOM_CPUFREQ_KRYO) += qcom-cpufreq-kryo.o obj-$(CONFIG_ARM_S3C2410_CPUFREQ) += s3c2410-cpufreq.o obj-$(CONFIG_ARM_S3C2412_CPUFREQ) += s3c2412-cpufreq.o obj-$(CONFIG_ARM_S3C2416_CPUFREQ) += s3c2416-cpufreq.o diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index 72a2975499dbaa..739da90ff3f654 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -23,6 +23,8 @@ #include #include +#include "cpufreq-dt.h" + /* Power management in North Bridge register set */ #define ARMADA_37XX_NB_L0L1 0x18 #define ARMADA_37XX_NB_L2L3 0x1C @@ -56,6 +58,16 @@ */ #define LOAD_LEVEL_NR 4 +struct armada37xx_cpufreq_state { + struct regmap *regmap; + u32 nb_l0l1; + u32 nb_l2l3; + u32 nb_dyn_mod; + u32 nb_cpu_load; +}; + +static struct armada37xx_cpufreq_state *armada37xx_cpufreq_state; + struct armada_37xx_dvfs { u32 cpu_freq_max; u8 divider[LOAD_LEVEL_NR]; @@ -136,7 +148,7 @@ static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base, clk_set_parent(clk, parent); } -static void __init armada37xx_cpufreq_disable_dvfs(struct regmap *base) +static void armada37xx_cpufreq_disable_dvfs(struct regmap *base) { unsigned int reg = ARMADA_37XX_NB_DYN_MOD, mask = ARMADA_37XX_NB_DFS_EN; @@ -162,10 +174,47 @@ static void __init armada37xx_cpufreq_enable_dvfs(struct regmap *base) regmap_update_bits(base, reg, mask, mask); } +static int armada37xx_cpufreq_suspend(struct cpufreq_policy *policy) +{ + struct armada37xx_cpufreq_state *state = armada37xx_cpufreq_state; + + regmap_read(state->regmap, ARMADA_37XX_NB_L0L1, &state->nb_l0l1); + regmap_read(state->regmap, ARMADA_37XX_NB_L2L3, &state->nb_l2l3); + regmap_read(state->regmap, ARMADA_37XX_NB_CPU_LOAD, + &state->nb_cpu_load); + regmap_read(state->regmap, ARMADA_37XX_NB_DYN_MOD, &state->nb_dyn_mod); + + return 0; +} + +static int armada37xx_cpufreq_resume(struct cpufreq_policy *policy) +{ + struct armada37xx_cpufreq_state *state = armada37xx_cpufreq_state; + + /* Ensure DVFS is disabled otherwise the following registers are RO */ + armada37xx_cpufreq_disable_dvfs(state->regmap); + + regmap_write(state->regmap, ARMADA_37XX_NB_L0L1, state->nb_l0l1); + regmap_write(state->regmap, ARMADA_37XX_NB_L2L3, state->nb_l2l3); + regmap_write(state->regmap, ARMADA_37XX_NB_CPU_LOAD, + state->nb_cpu_load); + + /* + * NB_DYN_MOD register is the one that actually enable back DVFS if it + * was enabled before the suspend operation. This must be done last + * otherwise other registers are not writable. + */ + regmap_write(state->regmap, ARMADA_37XX_NB_DYN_MOD, state->nb_dyn_mod); + + return 0; +} + static int __init armada37xx_cpufreq_driver_init(void) { + struct cpufreq_dt_platform_data pdata; struct armada_37xx_dvfs *dvfs; struct platform_device *pdev; + unsigned long freq; unsigned int cur_frequency; struct regmap *nb_pm_base; struct device *cpu_dev; @@ -207,33 +256,58 @@ static int __init armada37xx_cpufreq_driver_init(void) } dvfs = armada_37xx_cpu_freq_info_get(cur_frequency); - if (!dvfs) + if (!dvfs) { + clk_put(clk); return -EINVAL; + } + + armada37xx_cpufreq_state = kmalloc(sizeof(*armada37xx_cpufreq_state), + GFP_KERNEL); + if (!armada37xx_cpufreq_state) { + clk_put(clk); + return -ENOMEM; + } + + armada37xx_cpufreq_state->regmap = nb_pm_base; armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider); clk_put(clk); for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR; load_lvl++) { - unsigned long freq = cur_frequency / dvfs->divider[load_lvl]; + freq = cur_frequency / dvfs->divider[load_lvl]; ret = dev_pm_opp_add(cpu_dev, freq, 0); - if (ret) { - /* clean-up the already added opp before leaving */ - while (load_lvl-- > ARMADA_37XX_DVFS_LOAD_0) { - freq = cur_frequency / dvfs->divider[load_lvl]; - dev_pm_opp_remove(cpu_dev, freq); - } - return ret; - } + if (ret) + goto remove_opp; } /* Now that everything is setup, enable the DVFS at hardware level */ armada37xx_cpufreq_enable_dvfs(nb_pm_base); - pdev = platform_device_register_simple("cpufreq-dt", -1, NULL, 0); + pdata.suspend = armada37xx_cpufreq_suspend; + pdata.resume = armada37xx_cpufreq_resume; + + pdev = platform_device_register_data(NULL, "cpufreq-dt", -1, &pdata, + sizeof(pdata)); + ret = PTR_ERR_OR_ZERO(pdev); + if (ret) + goto disable_dvfs; + + return 0; + +disable_dvfs: + armada37xx_cpufreq_disable_dvfs(nb_pm_base); +remove_opp: + /* clean-up the already added opp before leaving */ + while (load_lvl-- > ARMADA_37XX_DVFS_LOAD_0) { + freq = cur_frequency / dvfs->divider[load_lvl]; + dev_pm_opp_remove(cpu_dev, freq); + } + + kfree(armada37xx_cpufreq_state); - return PTR_ERR_OR_ZERO(pdev); + return ret; } /* late_initcall, to guarantee the driver is loaded after A37xx clock driver */ late_initcall(armada37xx_cpufreq_driver_init); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 3b585e4bfac513..fe14c57de6cabc 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -66,8 +66,6 @@ static const struct of_device_id whitelist[] __initconst = { { .compatible = "renesas,r8a7792", }, { .compatible = "renesas,r8a7793", }, { .compatible = "renesas,r8a7794", }, - { .compatible = "renesas,r8a7795", }, - { .compatible = "renesas,r8a7796", }, { .compatible = "renesas,sh73a0", }, { .compatible = "rockchip,rk2928", }, @@ -118,6 +116,9 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "nvidia,tegra124", }, + { .compatible = "qcom,apq8096", }, + { .compatible = "qcom,msm8996", }, + { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 190ea0dccb7925..0a9ebf00be468e 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -346,8 +346,14 @@ static int dt_cpufreq_probe(struct platform_device *pdev) if (ret) return ret; - if (data && data->have_governor_per_policy) - dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY; + if (data) { + if (data->have_governor_per_policy) + dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY; + + dt_cpufreq_driver.resume = data->resume; + if (data->suspend) + dt_cpufreq_driver.suspend = data->suspend; + } ret = cpufreq_register_driver(&dt_cpufreq_driver); if (ret) diff --git a/drivers/cpufreq/cpufreq-dt.h b/drivers/cpufreq/cpufreq-dt.h index 54d774e46c435e..d5aeea13433e99 100644 --- a/drivers/cpufreq/cpufreq-dt.h +++ b/drivers/cpufreq/cpufreq-dt.h @@ -12,8 +12,13 @@ #include +struct cpufreq_policy; + struct cpufreq_dt_platform_data { bool have_governor_per_policy; + + int (*suspend)(struct cpufreq_policy *policy); + int (*resume)(struct cpufreq_policy *policy); }; #endif /* __CPUFREQ_DT_H__ */ diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 075d18f6ba7ab5..b0dfd3222013d5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -89,16 +89,7 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy); * The mutex locks both lists. */ static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list); -static struct srcu_notifier_head cpufreq_transition_notifier_list; - -static bool init_cpufreq_transition_notifier_list_called; -static int __init init_cpufreq_transition_notifier_list(void) -{ - srcu_init_notifier_head(&cpufreq_transition_notifier_list); - init_cpufreq_transition_notifier_list_called = true; - return 0; -} -pure_initcall(init_cpufreq_transition_notifier_list); +SRCU_NOTIFIER_HEAD_STATIC(cpufreq_transition_notifier_list); static int off __read_mostly; static int cpufreq_disabled(void) @@ -300,8 +291,19 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) #endif } -static void __cpufreq_notify_transition(struct cpufreq_policy *policy, - struct cpufreq_freqs *freqs, unsigned int state) +/** + * cpufreq_notify_transition - Notify frequency transition and adjust_jiffies. + * @policy: cpufreq policy to enable fast frequency switching for. + * @freqs: contain details of the frequency update. + * @state: set to CPUFREQ_PRECHANGE or CPUFREQ_POSTCHANGE. + * + * This function calls the transition notifiers and the "adjust_jiffies" + * function. It is called twice on all CPU frequency changes that have + * external effects. + */ +static void cpufreq_notify_transition(struct cpufreq_policy *policy, + struct cpufreq_freqs *freqs, + unsigned int state) { BUG_ON(irqs_disabled()); @@ -313,54 +315,44 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy, state, freqs->new); switch (state) { - case CPUFREQ_PRECHANGE: - /* detect if the driver reported a value as "old frequency" + /* + * Detect if the driver reported a value as "old frequency" * which is not equal to what the cpufreq core thinks is * "old frequency". */ if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { - if ((policy) && (policy->cpu == freqs->cpu) && - (policy->cur) && (policy->cur != freqs->old)) { + if (policy->cur && (policy->cur != freqs->old)) { pr_debug("Warning: CPU frequency is %u, cpufreq assumed %u kHz\n", freqs->old, policy->cur); freqs->old = policy->cur; } } - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, - CPUFREQ_PRECHANGE, freqs); + + for_each_cpu(freqs->cpu, policy->cpus) { + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, + CPUFREQ_PRECHANGE, freqs); + } + adjust_jiffies(CPUFREQ_PRECHANGE, freqs); break; case CPUFREQ_POSTCHANGE: adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); - pr_debug("FREQ: %lu - CPU: %lu\n", - (unsigned long)freqs->new, (unsigned long)freqs->cpu); - trace_cpu_frequency(freqs->new, freqs->cpu); + pr_debug("FREQ: %u - CPUs: %*pbl\n", freqs->new, + cpumask_pr_args(policy->cpus)); + + for_each_cpu(freqs->cpu, policy->cpus) { + trace_cpu_frequency(freqs->new, freqs->cpu); + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, + CPUFREQ_POSTCHANGE, freqs); + } + cpufreq_stats_record_transition(policy, freqs->new); - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, - CPUFREQ_POSTCHANGE, freqs); - if (likely(policy) && likely(policy->cpu == freqs->cpu)) - policy->cur = freqs->new; - break; + policy->cur = freqs->new; } } -/** - * cpufreq_notify_transition - call notifier chain and adjust_jiffies - * on frequency transition. - * - * This function calls the transition notifiers and the "adjust_jiffies" - * function. It is called twice on all CPU frequency changes that have - * external effects. - */ -static void cpufreq_notify_transition(struct cpufreq_policy *policy, - struct cpufreq_freqs *freqs, unsigned int state) -{ - for_each_cpu(freqs->cpu, policy->cpus) - __cpufreq_notify_transition(policy, freqs, state); -} - /* Do post notifications when there are chances that transition has failed */ static void cpufreq_notify_post_transition(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, int transition_failed) @@ -696,6 +688,8 @@ static ssize_t store_##file_name \ struct cpufreq_policy new_policy; \ \ memcpy(&new_policy, policy, sizeof(*policy)); \ + new_policy.min = policy->user_policy.min; \ + new_policy.max = policy->user_policy.max; \ \ ret = sscanf(buf, "%u", &new_policy.object); \ if (ret != 1) \ @@ -1764,8 +1758,6 @@ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) if (cpufreq_disabled()) return -EINVAL; - WARN_ON(!init_cpufreq_transition_notifier_list_called); - switch (list) { case CPUFREQ_TRANSITION_NOTIFIER: mutex_lock(&cpufreq_fast_switch_lock); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index ca38229b045ab2..871bf9cf55cf0d 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -278,7 +278,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; u64 delta_ns, lst; - if (!cpufreq_can_do_remote_dvfs(policy_dbs->policy)) + if (!cpufreq_this_cpu_can_update(policy_dbs->policy)) return; /* diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 17e566afbb41c3..08960a55eb27a4 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1939,13 +1939,51 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) return 0; } +/* Use of trace in passive mode: + * + * In passive mode the trace core_busy field (also known as the + * performance field, and lablelled as such on the graphs; also known as + * core_avg_perf) is not needed and so is re-assigned to indicate if the + * driver call was via the normal or fast switch path. Various graphs + * output from the intel_pstate_tracer.py utility that include core_busy + * (or performance or core_avg_perf) have a fixed y-axis from 0 to 100%, + * so we use 10 to indicate the the normal path through the driver, and + * 90 to indicate the fast switch path through the driver. + * The scaled_busy field is not used, and is set to 0. + */ + +#define INTEL_PSTATE_TRACE_TARGET 10 +#define INTEL_PSTATE_TRACE_FAST_SWITCH 90 + +static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, int old_pstate) +{ + struct sample *sample; + + if (!trace_pstate_sample_enabled()) + return; + + if (!intel_pstate_sample(cpu, ktime_get())) + return; + + sample = &cpu->sample; + trace_pstate_sample(trace_type, + 0, + old_pstate, + cpu->pstate.current_pstate, + sample->mperf, + sample->aperf, + sample->tsc, + get_avg_frequency(cpu), + fp_toint(cpu->iowait_boost * 100)); +} + static int intel_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { struct cpudata *cpu = all_cpu_data[policy->cpu]; struct cpufreq_freqs freqs; - int target_pstate; + int target_pstate, old_pstate; update_turbo_state(); @@ -1965,12 +2003,14 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, break; } target_pstate = intel_pstate_prepare_request(cpu, target_pstate); + old_pstate = cpu->pstate.current_pstate; if (target_pstate != cpu->pstate.current_pstate) { cpu->pstate.current_pstate = target_pstate; wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, target_pstate)); } freqs.new = target_pstate * cpu->pstate.scaling; + intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_TARGET, old_pstate); cpufreq_freq_transition_end(policy, &freqs, false); return 0; @@ -1980,13 +2020,15 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { struct cpudata *cpu = all_cpu_data[policy->cpu]; - int target_pstate; + int target_pstate, old_pstate; update_turbo_state(); target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); target_pstate = intel_pstate_prepare_request(cpu, target_pstate); + old_pstate = cpu->pstate.current_pstate; intel_pstate_update_pstate(cpu, target_pstate); + intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate); return target_pstate * cpu->pstate.scaling; } diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c new file mode 100644 index 00000000000000..d049fe4b80c48e --- /dev/null +++ b/drivers/cpufreq/qcom-cpufreq-kryo.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + */ + +/* + * In Certain QCOM SoCs like apq8096 and msm8996 that have KRYO processors, + * the CPU frequency subset and voltage value of each OPP varies + * based on the silicon variant in use. Qualcomm Process Voltage Scaling Tables + * defines the voltage and frequency value based on the msm-id in SMEM + * and speedbin blown in the efuse combination. + * The qcom-cpufreq-kryo driver reads the msm-id and efuse value from the SoC + * to provide the OPP framework with required information. + * This is used to determine the voltage and frequency value for each OPP of + * operating-points-v2 table when it is parsed by the OPP framework. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MSM_ID_SMEM 137 + +enum _msm_id { + MSM8996V3 = 0xF6ul, + APQ8096V3 = 0x123ul, + MSM8996SG = 0x131ul, + APQ8096SG = 0x138ul, +}; + +enum _msm8996_version { + MSM8996_V3, + MSM8996_SG, + NUM_OF_MSM8996_VERSIONS, +}; + +static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void) +{ + size_t len; + u32 *msm_id; + enum _msm8996_version version; + + msm_id = qcom_smem_get(QCOM_SMEM_HOST_ANY, MSM_ID_SMEM, &len); + if (IS_ERR(msm_id)) + return NUM_OF_MSM8996_VERSIONS; + + /* The first 4 bytes are format, next to them is the actual msm-id */ + msm_id++; + + switch ((enum _msm_id)*msm_id) { + case MSM8996V3: + case APQ8096V3: + version = MSM8996_V3; + break; + case MSM8996SG: + case APQ8096SG: + version = MSM8996_SG; + break; + default: + version = NUM_OF_MSM8996_VERSIONS; + } + + return version; +} + +static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) +{ + struct opp_table *opp_tables[NR_CPUS] = {0}; + struct platform_device *cpufreq_dt_pdev; + enum _msm8996_version msm8996_version; + struct nvmem_cell *speedbin_nvmem; + struct device_node *np; + struct device *cpu_dev; + unsigned cpu; + u8 *speedbin; + u32 versions; + size_t len; + int ret; + + cpu_dev = get_cpu_device(0); + if (NULL == cpu_dev) + ret = -ENODEV; + + msm8996_version = qcom_cpufreq_kryo_get_msm_id(); + if (NUM_OF_MSM8996_VERSIONS == msm8996_version) { + dev_err(cpu_dev, "Not Snapdragon 820/821!"); + return -ENODEV; + } + + np = dev_pm_opp_of_get_opp_desc_node(cpu_dev); + if (IS_ERR(np)) + return PTR_ERR(np); + + ret = of_device_is_compatible(np, "operating-points-v2-kryo-cpu"); + if (!ret) { + of_node_put(np); + return -ENOENT; + } + + speedbin_nvmem = of_nvmem_cell_get(np, NULL); + of_node_put(np); + if (IS_ERR(speedbin_nvmem)) { + dev_err(cpu_dev, "Could not get nvmem cell: %ld\n", + PTR_ERR(speedbin_nvmem)); + return PTR_ERR(speedbin_nvmem); + } + + speedbin = nvmem_cell_read(speedbin_nvmem, &len); + nvmem_cell_put(speedbin_nvmem); + + switch (msm8996_version) { + case MSM8996_V3: + versions = 1 << (unsigned int)(*speedbin); + break; + case MSM8996_SG: + versions = 1 << ((unsigned int)(*speedbin) + 4); + break; + default: + BUG(); + break; + } + + for_each_possible_cpu(cpu) { + cpu_dev = get_cpu_device(cpu); + if (NULL == cpu_dev) { + ret = -ENODEV; + goto free_opp; + } + + opp_tables[cpu] = dev_pm_opp_set_supported_hw(cpu_dev, + &versions, 1); + if (IS_ERR(opp_tables[cpu])) { + ret = PTR_ERR(opp_tables[cpu]); + dev_err(cpu_dev, "Failed to set supported hardware\n"); + goto free_opp; + } + } + + cpufreq_dt_pdev = platform_device_register_simple("cpufreq-dt", -1, + NULL, 0); + if (!IS_ERR(cpufreq_dt_pdev)) + return 0; + + ret = PTR_ERR(cpufreq_dt_pdev); + dev_err(cpu_dev, "Failed to register platform device\n"); + +free_opp: + for_each_possible_cpu(cpu) { + if (IS_ERR_OR_NULL(opp_tables[cpu])) + break; + dev_pm_opp_put_supported_hw(opp_tables[cpu]); + } + + return ret; +} + +static struct platform_driver qcom_cpufreq_kryo_driver = { + .probe = qcom_cpufreq_kryo_probe, + .driver = { + .name = "qcom-cpufreq-kryo", + }, +}; + +static const struct of_device_id qcom_cpufreq_kryo_match_list[] __initconst = { + { .compatible = "qcom,apq8096", }, + { .compatible = "qcom,msm8996", }, +}; + +/* + * Since the driver depends on smem and nvmem drivers, which may + * return EPROBE_DEFER, all the real activity is done in the probe, + * which may be defered as well. The init here is only registering + * the driver and the platform device. + */ +static int __init qcom_cpufreq_kryo_init(void) +{ + struct device_node *np = of_find_node_by_path("/"); + const struct of_device_id *match; + int ret; + + if (!np) + return -ENODEV; + + match = of_match_node(qcom_cpufreq_kryo_match_list, np); + of_node_put(np); + if (!match) + return -ENODEV; + + ret = platform_driver_register(&qcom_cpufreq_kryo_driver); + if (unlikely(ret < 0)) + return ret; + + ret = PTR_ERR_OR_ZERO(platform_device_register_simple( + "qcom-cpufreq-kryo", -1, NULL, 0)); + if (0 == ret) + return 0; + + platform_driver_unregister(&qcom_cpufreq_kryo_driver); + return ret; +} +module_init(qcom_cpufreq_kryo_init); + +MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/cpufreq/s3c2440-cpufreq.c b/drivers/cpufreq/s3c2440-cpufreq.c index d0d75b65ddd6df..d2f67b7a20dd3d 100644 --- a/drivers/cpufreq/s3c2440-cpufreq.c +++ b/drivers/cpufreq/s3c2440-cpufreq.c @@ -143,7 +143,7 @@ static void s3c2440_cpufreq_setdivs(struct s3c_cpufreq_config *cfg) { unsigned long clkdiv, camdiv; - s3c_freq_dbg("%s: divsiors: h=%d, p=%d\n", __func__, + s3c_freq_dbg("%s: divisors: h=%d, p=%d\n", __func__, cfg->divs.h_divisor, cfg->divs.p_divisor); clkdiv = __raw_readl(S3C2410_CLKDIVN); diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c index e3a9962ee4109b..cabb6f48eb7798 100644 --- a/drivers/cpufreq/speedstep-lib.c +++ b/drivers/cpufreq/speedstep-lib.c @@ -252,7 +252,7 @@ EXPORT_SYMBOL_GPL(speedstep_get_frequency); *********************************************************************/ /* Keep in sync with the x86_cpu_id tables in the different modules */ -unsigned int speedstep_detect_processor(void) +enum speedstep_processor speedstep_detect_processor(void) { struct cpuinfo_x86 *c = &cpu_data(0); u32 ebx, msr_lo, msr_hi; diff --git a/drivers/cpufreq/tegra20-cpufreq.c b/drivers/cpufreq/tegra20-cpufreq.c index 2bd62845e9d558..05f57dcd5215b7 100644 --- a/drivers/cpufreq/tegra20-cpufreq.c +++ b/drivers/cpufreq/tegra20-cpufreq.c @@ -16,16 +16,13 @@ * */ -#include -#include -#include -#include +#include #include -#include -#include #include -#include -#include +#include +#include +#include +#include static struct cpufreq_frequency_table freq_table[] = { { .frequency = 216000 }, @@ -39,25 +36,27 @@ static struct cpufreq_frequency_table freq_table[] = { { .frequency = CPUFREQ_TABLE_END }, }; -#define NUM_CPUS 2 - -static struct clk *cpu_clk; -static struct clk *pll_x_clk; -static struct clk *pll_p_clk; -static struct clk *emc_clk; -static bool pll_x_prepared; +struct tegra20_cpufreq { + struct device *dev; + struct cpufreq_driver driver; + struct clk *cpu_clk; + struct clk *pll_x_clk; + struct clk *pll_p_clk; + bool pll_x_prepared; +}; static unsigned int tegra_get_intermediate(struct cpufreq_policy *policy, unsigned int index) { - unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000; + struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data(); + unsigned int ifreq = clk_get_rate(cpufreq->pll_p_clk) / 1000; /* * Don't switch to intermediate freq if: * - we are already at it, i.e. policy->cur == ifreq * - index corresponds to ifreq */ - if ((freq_table[index].frequency == ifreq) || (policy->cur == ifreq)) + if (freq_table[index].frequency == ifreq || policy->cur == ifreq) return 0; return ifreq; @@ -66,6 +65,7 @@ static unsigned int tegra_get_intermediate(struct cpufreq_policy *policy, static int tegra_target_intermediate(struct cpufreq_policy *policy, unsigned int index) { + struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data(); int ret; /* @@ -78,47 +78,37 @@ static int tegra_target_intermediate(struct cpufreq_policy *policy, * Also, we wouldn't be using pll_x anymore and must not take extra * reference to it, as it can be disabled now to save some power. */ - clk_prepare_enable(pll_x_clk); + clk_prepare_enable(cpufreq->pll_x_clk); - ret = clk_set_parent(cpu_clk, pll_p_clk); + ret = clk_set_parent(cpufreq->cpu_clk, cpufreq->pll_p_clk); if (ret) - clk_disable_unprepare(pll_x_clk); + clk_disable_unprepare(cpufreq->pll_x_clk); else - pll_x_prepared = true; + cpufreq->pll_x_prepared = true; return ret; } static int tegra_target(struct cpufreq_policy *policy, unsigned int index) { + struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data(); unsigned long rate = freq_table[index].frequency; - unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000; - int ret = 0; - - /* - * Vote on memory bus frequency based on cpu frequency - * This sets the minimum frequency, display or avp may request higher - */ - if (rate >= 816000) - clk_set_rate(emc_clk, 600000000); /* cpu 816 MHz, emc max */ - else if (rate >= 456000) - clk_set_rate(emc_clk, 300000000); /* cpu 456 MHz, emc 150Mhz */ - else - clk_set_rate(emc_clk, 100000000); /* emc 50Mhz */ + unsigned int ifreq = clk_get_rate(cpufreq->pll_p_clk) / 1000; + int ret; /* * target freq == pll_p, don't need to take extra reference to pll_x_clk * as it isn't used anymore. */ if (rate == ifreq) - return clk_set_parent(cpu_clk, pll_p_clk); + return clk_set_parent(cpufreq->cpu_clk, cpufreq->pll_p_clk); - ret = clk_set_rate(pll_x_clk, rate * 1000); + ret = clk_set_rate(cpufreq->pll_x_clk, rate * 1000); /* Restore to earlier frequency on error, i.e. pll_x */ if (ret) - pr_err("Failed to change pll_x to %lu\n", rate); + dev_err(cpufreq->dev, "Failed to change pll_x to %lu\n", rate); - ret = clk_set_parent(cpu_clk, pll_x_clk); + ret = clk_set_parent(cpufreq->cpu_clk, cpufreq->pll_x_clk); /* This shouldn't fail while changing or restoring */ WARN_ON(ret); @@ -126,9 +116,9 @@ static int tegra_target(struct cpufreq_policy *policy, unsigned int index) * Drop count to pll_x clock only if we switched to intermediate freq * earlier while transitioning to a target frequency. */ - if (pll_x_prepared) { - clk_disable_unprepare(pll_x_clk); - pll_x_prepared = false; + if (cpufreq->pll_x_prepared) { + clk_disable_unprepare(cpufreq->pll_x_clk); + cpufreq->pll_x_prepared = false; } return ret; @@ -136,81 +126,111 @@ static int tegra_target(struct cpufreq_policy *policy, unsigned int index) static int tegra_cpu_init(struct cpufreq_policy *policy) { + struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data(); int ret; - if (policy->cpu >= NUM_CPUS) - return -EINVAL; - - clk_prepare_enable(emc_clk); - clk_prepare_enable(cpu_clk); + clk_prepare_enable(cpufreq->cpu_clk); /* FIXME: what's the actual transition time? */ ret = cpufreq_generic_init(policy, freq_table, 300 * 1000); if (ret) { - clk_disable_unprepare(cpu_clk); - clk_disable_unprepare(emc_clk); + clk_disable_unprepare(cpufreq->cpu_clk); return ret; } - policy->clk = cpu_clk; + policy->clk = cpufreq->cpu_clk; policy->suspend_freq = freq_table[0].frequency; return 0; } static int tegra_cpu_exit(struct cpufreq_policy *policy) { - clk_disable_unprepare(cpu_clk); - clk_disable_unprepare(emc_clk); + struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data(); + + clk_disable_unprepare(cpufreq->cpu_clk); return 0; } -static struct cpufreq_driver tegra_cpufreq_driver = { - .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, - .verify = cpufreq_generic_frequency_table_verify, - .get_intermediate = tegra_get_intermediate, - .target_intermediate = tegra_target_intermediate, - .target_index = tegra_target, - .get = cpufreq_generic_get, - .init = tegra_cpu_init, - .exit = tegra_cpu_exit, - .name = "tegra", - .attr = cpufreq_generic_attr, - .suspend = cpufreq_generic_suspend, -}; - -static int __init tegra_cpufreq_init(void) +static int tegra20_cpufreq_probe(struct platform_device *pdev) { - cpu_clk = clk_get_sys(NULL, "cclk"); - if (IS_ERR(cpu_clk)) - return PTR_ERR(cpu_clk); - - pll_x_clk = clk_get_sys(NULL, "pll_x"); - if (IS_ERR(pll_x_clk)) - return PTR_ERR(pll_x_clk); - - pll_p_clk = clk_get_sys(NULL, "pll_p"); - if (IS_ERR(pll_p_clk)) - return PTR_ERR(pll_p_clk); - - emc_clk = clk_get_sys("cpu", "emc"); - if (IS_ERR(emc_clk)) { - clk_put(cpu_clk); - return PTR_ERR(emc_clk); + struct tegra20_cpufreq *cpufreq; + int err; + + cpufreq = devm_kzalloc(&pdev->dev, sizeof(*cpufreq), GFP_KERNEL); + if (!cpufreq) + return -ENOMEM; + + cpufreq->cpu_clk = clk_get_sys(NULL, "cclk"); + if (IS_ERR(cpufreq->cpu_clk)) + return PTR_ERR(cpufreq->cpu_clk); + + cpufreq->pll_x_clk = clk_get_sys(NULL, "pll_x"); + if (IS_ERR(cpufreq->pll_x_clk)) { + err = PTR_ERR(cpufreq->pll_x_clk); + goto put_cpu; + } + + cpufreq->pll_p_clk = clk_get_sys(NULL, "pll_p"); + if (IS_ERR(cpufreq->pll_p_clk)) { + err = PTR_ERR(cpufreq->pll_p_clk); + goto put_pll_x; } - return cpufreq_register_driver(&tegra_cpufreq_driver); + cpufreq->dev = &pdev->dev; + cpufreq->driver.get = cpufreq_generic_get; + cpufreq->driver.attr = cpufreq_generic_attr; + cpufreq->driver.init = tegra_cpu_init; + cpufreq->driver.exit = tegra_cpu_exit; + cpufreq->driver.flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK; + cpufreq->driver.verify = cpufreq_generic_frequency_table_verify; + cpufreq->driver.suspend = cpufreq_generic_suspend; + cpufreq->driver.driver_data = cpufreq; + cpufreq->driver.target_index = tegra_target; + cpufreq->driver.get_intermediate = tegra_get_intermediate; + cpufreq->driver.target_intermediate = tegra_target_intermediate; + snprintf(cpufreq->driver.name, CPUFREQ_NAME_LEN, "tegra"); + + err = cpufreq_register_driver(&cpufreq->driver); + if (err) + goto put_pll_p; + + platform_set_drvdata(pdev, cpufreq); + + return 0; + +put_pll_p: + clk_put(cpufreq->pll_p_clk); +put_pll_x: + clk_put(cpufreq->pll_x_clk); +put_cpu: + clk_put(cpufreq->cpu_clk); + + return err; } -static void __exit tegra_cpufreq_exit(void) +static int tegra20_cpufreq_remove(struct platform_device *pdev) { - cpufreq_unregister_driver(&tegra_cpufreq_driver); - clk_put(emc_clk); - clk_put(cpu_clk); + struct tegra20_cpufreq *cpufreq = platform_get_drvdata(pdev); + + cpufreq_unregister_driver(&cpufreq->driver); + + clk_put(cpufreq->pll_p_clk); + clk_put(cpufreq->pll_x_clk); + clk_put(cpufreq->cpu_clk); + + return 0; } +static struct platform_driver tegra20_cpufreq_driver = { + .probe = tegra20_cpufreq_probe, + .remove = tegra20_cpufreq_remove, + .driver = { + .name = "tegra20-cpufreq", + }, +}; +module_platform_driver(tegra20_cpufreq_driver); +MODULE_ALIAS("platform:tegra20-cpufreq"); MODULE_AUTHOR("Colin Cross "); -MODULE_DESCRIPTION("cpufreq driver for Nvidia Tegra2"); +MODULE_DESCRIPTION("NVIDIA Tegra20 cpufreq driver"); MODULE_LICENSE("GPL"); -module_init(tegra_cpufreq_init); -module_exit(tegra_cpufreq_exit); diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index 5d359aff3cc538..9fed1b82929278 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -8,8 +8,10 @@ * This code is licenced under the GPL. */ -#include +#include #include +#include +#include #include "cpuidle.h" @@ -93,3 +95,16 @@ int cpuidle_register_governor(struct cpuidle_governor *gov) return ret; } + +/** + * cpuidle_governor_latency_req - Compute a latency constraint for CPU + * @cpu: Target CPU + */ +int cpuidle_governor_latency_req(unsigned int cpu) +{ + int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); + struct device *device = get_cpu_device(cpu); + int device_req = dev_pm_qos_raw_read_value(device); + + return device_req < global_req ? device_req : global_req; +} diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index b24883f85c9954..704880a6612a76 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -14,10 +14,8 @@ #include #include -#include #include #include -#include #include #include @@ -69,16 +67,10 @@ static int ladder_select_state(struct cpuidle_driver *drv, struct cpuidle_device *dev, bool *dummy) { struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); - struct device *device = get_cpu_device(dev->cpu); struct ladder_device_state *last_state; int last_residency, last_idx = ldev->last_state_idx; int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0; - int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); - int resume_latency = dev_pm_qos_raw_read_value(device); - - if (resume_latency < latency_req && - resume_latency != PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) - latency_req = resume_latency; + int latency_req = cpuidle_governor_latency_req(dev->cpu); /* Special case when user has set very strict latency requirement */ if (unlikely(latency_req == 0)) { diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 1bfe03ceb236c3..1aef60d160eb0c 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -12,7 +12,6 @@ #include #include -#include #include #include #include @@ -21,7 +20,6 @@ #include #include #include -#include /* * Please note when changing the tuning values: @@ -286,15 +284,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, bool *stop_tick) { struct menu_device *data = this_cpu_ptr(&menu_devices); - struct device *device = get_cpu_device(dev->cpu); - int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); + int latency_req = cpuidle_governor_latency_req(dev->cpu); int i; int first_idx; int idx; unsigned int interactivity_req; unsigned int expected_interval; unsigned long nr_iowaiters, cpu_load; - int resume_latency = dev_pm_qos_raw_read_value(device); ktime_t delta_next; if (data->needs_update) { @@ -302,10 +298,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, data->needs_update = 0; } - if (resume_latency < latency_req && - resume_latency != PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) - latency_req = resume_latency; - /* Special case when user has set very strict latency requirement */ if (unlikely(latency_req == 0)) { *stop_tick = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index a29362f9ef415d..12558044acd46f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -513,7 +513,7 @@ static int acp_hw_fini(void *handle) if (adev->acp.acp_genpd) { for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev); + ret = pm_genpd_remove_device(dev); /* If removal fails, dont giveup and try rest */ if (ret) dev_err(dev, "remove dev from genpd failed\n"); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 1ba40bb2b966a9..a17f46a95f73d6 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -363,7 +363,7 @@ static int i2c_device_probe(struct device *dev) goto err_clear_wakeup_irq; status = dev_pm_domain_attach(&client->dev, true); - if (status == -EPROBE_DEFER) + if (status) goto err_clear_wakeup_irq; /* diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index 2b32b88949ba40..b6d8203e46ebe1 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -139,7 +139,7 @@ static int sdio_bus_probe(struct device *dev) return -ENODEV; ret = dev_pm_domain_attach(dev, false); - if (ret == -EPROBE_DEFER) + if (ret) return ret; /* Unbound SDIO functions are always suspended. diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 92fa94a6dcc111..ab2f3fead6b1ce 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -33,8 +33,6 @@ LIST_HEAD(opp_tables); /* Lock to allow exclusive modification to the device and opp lists */ DEFINE_MUTEX(opp_table_lock); -static void dev_pm_opp_get(struct dev_pm_opp *opp); - static struct opp_device *_find_opp_dev(const struct device *dev, struct opp_table *opp_table) { @@ -281,6 +279,23 @@ unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp_freq); +int _get_opp_count(struct opp_table *opp_table) +{ + struct dev_pm_opp *opp; + int count = 0; + + mutex_lock(&opp_table->lock); + + list_for_each_entry(opp, &opp_table->opp_list, node) { + if (opp->available) + count++; + } + + mutex_unlock(&opp_table->lock); + + return count; +} + /** * dev_pm_opp_get_opp_count() - Get number of opps available in the opp table * @dev: device for which we do this operation @@ -291,25 +306,17 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp_freq); int dev_pm_opp_get_opp_count(struct device *dev) { struct opp_table *opp_table; - struct dev_pm_opp *temp_opp; - int count = 0; + int count; opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) { count = PTR_ERR(opp_table); dev_dbg(dev, "%s: OPP table not found (%d)\n", __func__, count); - return count; - } - - mutex_lock(&opp_table->lock); - - list_for_each_entry(temp_opp, &opp_table->opp_list, node) { - if (temp_opp->available) - count++; + return 0; } - mutex_unlock(&opp_table->lock); + count = _get_opp_count(opp_table); dev_pm_opp_put_opp_table(opp_table); return count; @@ -892,7 +899,7 @@ static void _opp_kref_release(struct kref *kref) dev_pm_opp_put_opp_table(opp_table); } -static void dev_pm_opp_get(struct dev_pm_opp *opp) +void dev_pm_opp_get(struct dev_pm_opp *opp) { kref_get(&opp->kref); } @@ -985,22 +992,11 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp, return true; } -/* - * Returns: - * 0: On success. And appropriate error message for duplicate OPPs. - * -EBUSY: For OPP with same freq/volt and is available. The callers of - * _opp_add() must return 0 if they receive -EBUSY from it. This is to make - * sure we don't print error messages unnecessarily if different parts of - * kernel try to initialize the OPP table. - * -EEXIST: For OPP with same freq but different volt or is unavailable. This - * should be considered an error by the callers of _opp_add(). - */ -int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, - struct opp_table *opp_table) +static int _opp_is_duplicate(struct device *dev, struct dev_pm_opp *new_opp, + struct opp_table *opp_table, + struct list_head **head) { struct dev_pm_opp *opp; - struct list_head *head; - int ret; /* * Insert new OPP in order of increasing frequency and discard if @@ -1010,17 +1006,14 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, * loop, don't replace it with head otherwise it will become an infinite * loop. */ - mutex_lock(&opp_table->lock); - head = &opp_table->opp_list; - list_for_each_entry(opp, &opp_table->opp_list, node) { if (new_opp->rate > opp->rate) { - head = &opp->node; + *head = &opp->node; continue; } if (new_opp->rate < opp->rate) - break; + return 0; /* Duplicate OPPs */ dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n", @@ -1029,15 +1022,39 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, new_opp->supplies[0].u_volt, new_opp->available); /* Should we compare voltages for all regulators here ? */ - ret = opp->available && - new_opp->supplies[0].u_volt == opp->supplies[0].u_volt ? -EBUSY : -EEXIST; - - mutex_unlock(&opp_table->lock); - return ret; + return opp->available && + new_opp->supplies[0].u_volt == opp->supplies[0].u_volt ? -EBUSY : -EEXIST; } - if (opp_table->get_pstate) - new_opp->pstate = opp_table->get_pstate(dev, new_opp->rate); + return 0; +} + +/* + * Returns: + * 0: On success. And appropriate error message for duplicate OPPs. + * -EBUSY: For OPP with same freq/volt and is available. The callers of + * _opp_add() must return 0 if they receive -EBUSY from it. This is to make + * sure we don't print error messages unnecessarily if different parts of + * kernel try to initialize the OPP table. + * -EEXIST: For OPP with same freq but different volt or is unavailable. This + * should be considered an error by the callers of _opp_add(). + */ +int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, + struct opp_table *opp_table, bool rate_not_available) +{ + struct list_head *head; + int ret; + + mutex_lock(&opp_table->lock); + head = &opp_table->opp_list; + + if (likely(!rate_not_available)) { + ret = _opp_is_duplicate(dev, new_opp, opp_table, &head); + if (ret) { + mutex_unlock(&opp_table->lock); + return ret; + } + } list_add(&new_opp->node, head); mutex_unlock(&opp_table->lock); @@ -1104,7 +1121,7 @@ int _opp_add_v1(struct opp_table *opp_table, struct device *dev, new_opp->available = true; new_opp->dynamic = dynamic; - ret = _opp_add(dev, new_opp, opp_table); + ret = _opp_add(dev, new_opp, opp_table, false); if (ret) { /* Don't return error for duplicate OPPs */ if (ret == -EBUSY) @@ -1140,7 +1157,6 @@ struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count) { struct opp_table *opp_table; - int ret; opp_table = dev_pm_opp_get_opp_table(dev); if (!opp_table) @@ -1149,29 +1165,20 @@ struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); - /* Do we already have a version hierarchy associated with opp_table? */ - if (opp_table->supported_hw) { - dev_err(dev, "%s: Already have supported hardware list\n", - __func__); - ret = -EBUSY; - goto err; - } + /* Another CPU that shares the OPP table has set the property ? */ + if (opp_table->supported_hw) + return opp_table; opp_table->supported_hw = kmemdup(versions, count * sizeof(*versions), GFP_KERNEL); if (!opp_table->supported_hw) { - ret = -ENOMEM; - goto err; + dev_pm_opp_put_opp_table(opp_table); + return ERR_PTR(-ENOMEM); } opp_table->supported_hw_count = count; return opp_table; - -err: - dev_pm_opp_put_opp_table(opp_table); - - return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw); @@ -1188,12 +1195,6 @@ void dev_pm_opp_put_supported_hw(struct opp_table *opp_table) /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); - if (!opp_table->supported_hw) { - pr_err("%s: Doesn't have supported hardware list\n", - __func__); - return; - } - kfree(opp_table->supported_hw); opp_table->supported_hw = NULL; opp_table->supported_hw_count = 0; @@ -1215,7 +1216,6 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) { struct opp_table *opp_table; - int ret; opp_table = dev_pm_opp_get_opp_table(dev); if (!opp_table) @@ -1224,26 +1224,17 @@ struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); - /* Do we already have a prop-name associated with opp_table? */ - if (opp_table->prop_name) { - dev_err(dev, "%s: Already have prop-name %s\n", __func__, - opp_table->prop_name); - ret = -EBUSY; - goto err; - } + /* Another CPU that shares the OPP table has set the property ? */ + if (opp_table->prop_name) + return opp_table; opp_table->prop_name = kstrdup(name, GFP_KERNEL); if (!opp_table->prop_name) { - ret = -ENOMEM; - goto err; + dev_pm_opp_put_opp_table(opp_table); + return ERR_PTR(-ENOMEM); } return opp_table; - -err: - dev_pm_opp_put_opp_table(opp_table); - - return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name); @@ -1260,11 +1251,6 @@ void dev_pm_opp_put_prop_name(struct opp_table *opp_table) /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); - if (!opp_table->prop_name) { - pr_err("%s: Doesn't have a prop-name\n", __func__); - return; - } - kfree(opp_table->prop_name); opp_table->prop_name = NULL; @@ -1334,11 +1320,9 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev, goto err; } - /* Already have regulators set */ - if (opp_table->regulators) { - ret = -EBUSY; - goto err; - } + /* Another CPU that shares the OPP table has set the regulators ? */ + if (opp_table->regulators) + return opp_table; opp_table->regulators = kmalloc_array(count, sizeof(*opp_table->regulators), @@ -1392,10 +1376,8 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table) { int i; - if (!opp_table->regulators) { - pr_err("%s: Doesn't have regulators set\n", __func__); - return; - } + if (!opp_table->regulators) + goto put_opp_table; /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); @@ -1409,6 +1391,7 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table) opp_table->regulators = NULL; opp_table->regulator_count = 0; +put_opp_table: dev_pm_opp_put_opp_table(opp_table); } EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulators); @@ -1494,7 +1477,6 @@ struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)) { struct opp_table *opp_table; - int ret; if (!set_opp) return ERR_PTR(-EINVAL); @@ -1505,24 +1487,15 @@ struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, /* This should be called before OPPs are initialized */ if (WARN_ON(!list_empty(&opp_table->opp_list))) { - ret = -EBUSY; - goto err; - } - - /* Already have custom set_opp helper */ - if (WARN_ON(opp_table->set_opp)) { - ret = -EBUSY; - goto err; + dev_pm_opp_put_opp_table(opp_table); + return ERR_PTR(-EBUSY); } - opp_table->set_opp = set_opp; + /* Another CPU that shares the OPP table has set the helper ? */ + if (!opp_table->set_opp) + opp_table->set_opp = set_opp; return opp_table; - -err: - dev_pm_opp_put_opp_table(opp_table); - - return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(dev_pm_opp_register_set_opp_helper); @@ -1535,96 +1508,14 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_register_set_opp_helper); */ void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) { - if (!opp_table->set_opp) { - pr_err("%s: Doesn't have custom set_opp helper set\n", - __func__); - return; - } - /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); opp_table->set_opp = NULL; - dev_pm_opp_put_opp_table(opp_table); } EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_set_opp_helper); -/** - * dev_pm_opp_register_get_pstate_helper() - Register get_pstate() helper. - * @dev: Device for which the helper is getting registered. - * @get_pstate: Helper. - * - * TODO: Remove this callback after the same information is available via Device - * Tree. - * - * This allows a platform to initialize the performance states of individual - * OPPs for its devices, until we get similar information directly from DT. - * - * This must be called before the OPPs are initialized for the device. - */ -struct opp_table *dev_pm_opp_register_get_pstate_helper(struct device *dev, - int (*get_pstate)(struct device *dev, unsigned long rate)) -{ - struct opp_table *opp_table; - int ret; - - if (!get_pstate) - return ERR_PTR(-EINVAL); - - opp_table = dev_pm_opp_get_opp_table(dev); - if (!opp_table) - return ERR_PTR(-ENOMEM); - - /* This should be called before OPPs are initialized */ - if (WARN_ON(!list_empty(&opp_table->opp_list))) { - ret = -EBUSY; - goto err; - } - - /* Already have genpd_performance_state set */ - if (WARN_ON(opp_table->genpd_performance_state)) { - ret = -EBUSY; - goto err; - } - - opp_table->genpd_performance_state = true; - opp_table->get_pstate = get_pstate; - - return opp_table; - -err: - dev_pm_opp_put_opp_table(opp_table); - - return ERR_PTR(ret); -} -EXPORT_SYMBOL_GPL(dev_pm_opp_register_get_pstate_helper); - -/** - * dev_pm_opp_unregister_get_pstate_helper() - Releases resources blocked for - * get_pstate() helper - * @opp_table: OPP table returned from dev_pm_opp_register_get_pstate_helper(). - * - * Release resources blocked for platform specific get_pstate() helper. - */ -void dev_pm_opp_unregister_get_pstate_helper(struct opp_table *opp_table) -{ - if (!opp_table->genpd_performance_state) { - pr_err("%s: Doesn't have performance states set\n", - __func__); - return; - } - - /* Make sure there are no concurrent readers while updating opp_table */ - WARN_ON(!list_empty(&opp_table->opp_list)); - - opp_table->genpd_performance_state = false; - opp_table->get_pstate = NULL; - - dev_pm_opp_put_opp_table(opp_table); -} -EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_get_pstate_helper); - /** * dev_pm_opp_add() - Add an OPP table from a table definitions * @dev: device for which we do this operation diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index b03c03576a624f..e6828e5f81b01a 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -77,10 +77,21 @@ int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table) { struct dentry *pdentry = opp_table->dentry; struct dentry *d; + unsigned long id; char name[25]; /* 20 chars for 64 bit value + 5 (opp:\0) */ - /* Rate is unique to each OPP, use it to give opp-name */ - snprintf(name, sizeof(name), "opp:%lu", opp->rate); + /* + * Get directory name for OPP. + * + * - Normally rate is unique to each OPP, use it to get unique opp-name. + * - For some devices rate isn't available, use index instead. + */ + if (likely(opp->rate)) + id = opp->rate; + else + id = _get_opp_count(opp_table); + + snprintf(name, sizeof(name), "opp:%lu", id); /* Create per-opp directory */ d = debugfs_create_dir(name, pdentry); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index cb716aa2f44be9..7af0ddec936bb4 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -250,20 +251,17 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table); /* Returns opp descriptor node for a device node, caller must * do of_node_put() */ -static struct device_node *_opp_of_get_opp_desc_node(struct device_node *np) +static struct device_node *_opp_of_get_opp_desc_node(struct device_node *np, + int index) { - /* - * There should be only ONE phandle present in "operating-points-v2" - * property. - */ - - return of_parse_phandle(np, "operating-points-v2", 0); + /* "operating-points-v2" can be an array for power domain providers */ + return of_parse_phandle(np, "operating-points-v2", index); } /* Returns opp descriptor node for a device, caller must do of_node_put() */ struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev) { - return _opp_of_get_opp_desc_node(dev->of_node); + return _opp_of_get_opp_desc_node(dev->of_node, 0); } EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node); @@ -289,9 +287,10 @@ static int _opp_add_static_v2(struct opp_table *opp_table, struct device *dev, struct device_node *np) { struct dev_pm_opp *new_opp; - u64 rate; + u64 rate = 0; u32 val; int ret; + bool rate_not_available = false; new_opp = _opp_allocate(opp_table); if (!new_opp) @@ -299,8 +298,21 @@ static int _opp_add_static_v2(struct opp_table *opp_table, struct device *dev, ret = of_property_read_u64(np, "opp-hz", &rate); if (ret < 0) { - dev_err(dev, "%s: opp-hz not found\n", __func__); - goto free_opp; + /* "opp-hz" is optional for devices like power domains. */ + if (!of_find_property(dev->of_node, "#power-domain-cells", + NULL)) { + dev_err(dev, "%s: opp-hz not found\n", __func__); + goto free_opp; + } + + rate_not_available = true; + } else { + /* + * Rate is defined as an unsigned long in clk API, and so + * casting explicitly to its type. Must be fixed once rate is 64 + * bit guaranteed in clk API. + */ + new_opp->rate = (unsigned long)rate; } /* Check if the OPP supports hardware's hierarchy of versions or not */ @@ -309,12 +321,6 @@ static int _opp_add_static_v2(struct opp_table *opp_table, struct device *dev, goto free_opp; } - /* - * Rate is defined as an unsigned long in clk API, and so casting - * explicitly to its type. Must be fixed once rate is 64 bit - * guaranteed in clk API. - */ - new_opp->rate = (unsigned long)rate; new_opp->turbo = of_property_read_bool(np, "turbo-mode"); new_opp->np = np; @@ -324,11 +330,13 @@ static int _opp_add_static_v2(struct opp_table *opp_table, struct device *dev, if (!of_property_read_u32(np, "clock-latency-ns", &val)) new_opp->clock_latency_ns = val; + new_opp->pstate = of_genpd_opp_to_performance_state(dev, np); + ret = opp_parse_supplies(new_opp, dev, opp_table); if (ret) goto free_opp; - ret = _opp_add(dev, new_opp, opp_table); + ret = _opp_add(dev, new_opp, opp_table, rate_not_available); if (ret) { /* Don't return error for duplicate OPPs */ if (ret == -EBUSY) @@ -374,7 +382,8 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) { struct device_node *np; struct opp_table *opp_table; - int ret = 0, count = 0; + int ret = 0, count = 0, pstate_count = 0; + struct dev_pm_opp *opp; opp_table = _managed_opp(opp_np); if (opp_table) { @@ -408,6 +417,20 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) goto put_opp_table; } + list_for_each_entry(opp, &opp_table->opp_list, node) + pstate_count += !!opp->pstate; + + /* Either all or none of the nodes shall have performance state set */ + if (pstate_count && pstate_count != count) { + dev_err(dev, "Not all nodes have performance state set (%d: %d)\n", + count, pstate_count); + ret = -ENOENT; + goto put_opp_table; + } + + if (pstate_count) + opp_table->genpd_performance_state = true; + opp_table->np = opp_np; if (of_property_read_bool(opp_np, "opp-shared")) opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED; @@ -509,6 +532,54 @@ int dev_pm_opp_of_add_table(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table); +/** + * dev_pm_opp_of_add_table_indexed() - Initialize indexed opp table from device tree + * @dev: device pointer used to lookup OPP table. + * @index: Index number. + * + * Register the initial OPP table with the OPP library for given device only + * using the "operating-points-v2" property. + * + * Return: + * 0 On success OR + * Duplicate OPPs (both freq and volt are same) and opp->available + * -EEXIST Freq are same and volt are different OR + * Duplicate OPPs (both freq and volt are same) and !opp->available + * -ENOMEM Memory allocation failure + * -ENODEV when 'operating-points' property is not found or is invalid data + * in device node. + * -ENODATA when empty 'operating-points' property is found + * -EINVAL when invalid entries are found in opp-v2 table + */ +int dev_pm_opp_of_add_table_indexed(struct device *dev, int index) +{ + struct device_node *opp_np; + int ret, count; + +again: + opp_np = _opp_of_get_opp_desc_node(dev->of_node, index); + if (!opp_np) { + /* + * If only one phandle is present, then the same OPP table + * applies for all index requests. + */ + count = of_count_phandle_with_args(dev->of_node, + "operating-points-v2", NULL); + if (count == 1 && index) { + index = 0; + goto again; + } + + return -ENODEV; + } + + ret = _of_add_opp_table_v2(dev, opp_np); + of_node_put(opp_np); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_indexed); + /* CPU device specific helpers */ /** @@ -613,7 +684,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, } /* Get OPP descriptor node */ - tmp_np = _opp_of_get_opp_desc_node(cpu_np); + tmp_np = _opp_of_get_opp_desc_node(cpu_np, 0); of_node_put(cpu_np); if (!tmp_np) { pr_err("%pOF: Couldn't find opp node\n", cpu_np); @@ -633,3 +704,76 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, return ret; } EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus); + +/** + * of_dev_pm_opp_find_required_opp() - Search for required OPP. + * @dev: The device whose OPP node is referenced by the 'np' DT node. + * @np: Node that contains the "required-opps" property. + * + * Returns the OPP of the device 'dev', whose phandle is present in the "np" + * node. Although the "required-opps" property supports having multiple + * phandles, this helper routine only parses the very first phandle in the list. + * + * Return: Matching opp, else returns ERR_PTR in case of error and should be + * handled using IS_ERR. + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, + struct device_node *np) +{ + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ENODEV); + struct device_node *required_np; + struct opp_table *opp_table; + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); + + required_np = of_parse_phandle(np, "required-opps", 0); + if (unlikely(!required_np)) { + dev_err(dev, "Unable to parse required-opps\n"); + goto put_opp_table; + } + + mutex_lock(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available && temp_opp->np == required_np) { + opp = temp_opp; + + /* Increment the reference count of OPP */ + dev_pm_opp_get(opp); + break; + } + } + + mutex_unlock(&opp_table->lock); + + of_node_put(required_np); +put_opp_table: + dev_pm_opp_put_opp_table(opp_table); + + return opp; +} +EXPORT_SYMBOL_GPL(of_dev_pm_opp_find_required_opp); + +/** + * dev_pm_opp_get_of_node() - Gets the DT node corresponding to an opp + * @opp: opp for which DT node has to be returned for + * + * Return: DT node corresponding to the opp, else 0 on success. + * + * The caller needs to put the node with of_node_put() after using it. + */ +struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp) +{ + if (IS_ERR_OR_NULL(opp)) { + pr_err("%s: Invalid parameters\n", __func__); + return NULL; + } + + return of_node_get(opp->np); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 4d00061648a354..7c540fd063b2dc 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -140,7 +140,6 @@ enum opp_table_access { * @genpd_performance_state: Device's power domain support performance state. * @set_opp: Platform specific set_opp callback * @set_opp_data: Data to be passed to set_opp callback - * @get_pstate: Platform specific get_pstate callback * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * @@ -178,7 +177,6 @@ struct opp_table { int (*set_opp)(struct dev_pm_set_opp_data *data); struct dev_pm_set_opp_data *set_opp_data; - int (*get_pstate)(struct device *dev, unsigned long rate); #ifdef CONFIG_DEBUG_FS struct dentry *dentry; @@ -187,14 +185,16 @@ struct opp_table { }; /* Routines internal to opp core */ +void dev_pm_opp_get(struct dev_pm_opp *opp); void _get_opp_table_kref(struct opp_table *opp_table); +int _get_opp_count(struct opp_table *opp_table); struct opp_table *_find_opp_table(struct device *dev); struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table); void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev, bool remove_all); void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all); struct dev_pm_opp *_opp_allocate(struct opp_table *opp_table); void _opp_free(struct dev_pm_opp *opp); -int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table); +int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table, bool rate_not_available); int _opp_add_v1(struct opp_table *opp_table, struct device *dev, unsigned long freq, long u_volt, bool dynamic); void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of); struct opp_table *_add_opp_table(struct device *dev); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index f8269a725667a5..18ba62c7648059 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -755,10 +755,11 @@ static int pci_pm_suspend(struct device *dev) * better to resume the device from runtime suspend here. */ if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) || - !pci_dev_keep_suspended(pci_dev)) + !pci_dev_keep_suspended(pci_dev)) { pm_runtime_resume(dev); + pci_dev->state_saved = false; + } - pci_dev->state_saved = false; if (pm->suspend) { pci_power_t prev = pci_dev->current_state; int error; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index dbfe7c4f377689..e90cf5c32e1432 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2025,8 +2025,7 @@ static pci_power_t pci_target_state(struct pci_dev *dev, bool wakeup) if (platform_pci_power_manageable(dev)) { /* - * Call the platform to choose the target state of the device - * and enable wake-up from this state if supported. + * Call the platform to find the target state for the device. */ pci_power_t state = platform_pci_choose_state(dev); @@ -2059,8 +2058,7 @@ static pci_power_t pci_target_state(struct pci_dev *dev, bool wakeup) if (wakeup) { /* * Find the deepest state from which the device can generate - * wake-up events, make it the target state and enable device - * to generate PME#. + * PME#. */ if (dev->pme_support) { while (target_state diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c index ed2b109ae8fcba..d6a5e6bf5f12a9 100644 --- a/drivers/power/avs/rockchip-io-domain.c +++ b/drivers/power/avs/rockchip-io-domain.c @@ -39,6 +39,10 @@ #define MAX_VOLTAGE_1_8 1980000 #define MAX_VOLTAGE_3_3 3600000 +#define PX30_IO_VSEL 0x180 +#define PX30_IO_VSEL_VCCIO6_SRC BIT(0) +#define PX30_IO_VSEL_VCCIO6_SUPPLY_NUM 1 + #define RK3288_SOC_CON2 0x24c #define RK3288_SOC_CON2_FLASH0 BIT(7) #define RK3288_SOC_FLASH_SUPPLY_NUM 2 @@ -151,6 +155,25 @@ static int rockchip_iodomain_notify(struct notifier_block *nb, return NOTIFY_OK; } +static void px30_iodomain_init(struct rockchip_iodomain *iod) +{ + int ret; + u32 val; + + /* if no VCCIO0 supply we should leave things alone */ + if (!iod->supplies[PX30_IO_VSEL_VCCIO6_SUPPLY_NUM].reg) + return; + + /* + * set vccio0 iodomain to also use this framework + * instead of a special gpio. + */ + val = PX30_IO_VSEL_VCCIO6_SRC | (PX30_IO_VSEL_VCCIO6_SRC << 16); + ret = regmap_write(iod->grf, PX30_IO_VSEL, val); + if (ret < 0) + dev_warn(iod->dev, "couldn't update vccio0 ctrl\n"); +} + static void rk3288_iodomain_init(struct rockchip_iodomain *iod) { int ret; @@ -227,6 +250,43 @@ static void rk3399_pmu_iodomain_init(struct rockchip_iodomain *iod) dev_warn(iod->dev, "couldn't update pmu io iodomain ctrl\n"); } +static const struct rockchip_iodomain_soc_data soc_data_px30 = { + .grf_offset = 0x180, + .supply_names = { + NULL, + "vccio6", + "vccio1", + "vccio2", + "vccio3", + "vccio4", + "vccio5", + "vccio-oscgpi", + }, + .init = px30_iodomain_init, +}; + +static const struct rockchip_iodomain_soc_data soc_data_px30_pmu = { + .grf_offset = 0x100, + .supply_names = { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "pmuio1", + "pmuio2", + }, +}; + /* * On the rk3188 the io-domains are handled by a shared register with the * lower 8 bits being still being continuing drive-strength settings. @@ -380,6 +440,14 @@ static const struct rockchip_iodomain_soc_data soc_data_rv1108_pmu = { }; static const struct of_device_id rockchip_iodomain_match[] = { + { + .compatible = "rockchip,px30-io-voltage-domain", + .data = (void *)&soc_data_px30 + }, + { + .compatible = "rockchip,px30-pmu-io-voltage-domain", + .data = (void *)&soc_data_px30_pmu + }, { .compatible = "rockchip,rk3188-io-voltage-domain", .data = &soc_data_rk3188 diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index d9fcdb592b3966..3e3d12ce458727 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -559,22 +559,28 @@ EXPORT_SYMBOL(tegra_powergate_remove_clamping); int tegra_powergate_sequence_power_up(unsigned int id, struct clk *clk, struct reset_control *rst) { - struct tegra_powergate pg; + struct tegra_powergate *pg; int err; if (!tegra_powergate_is_available(id)) return -EINVAL; - pg.id = id; - pg.clks = &clk; - pg.num_clks = 1; - pg.reset = rst; - pg.pmc = pmc; + pg = kzalloc(sizeof(*pg), GFP_KERNEL); + if (!pg) + return -ENOMEM; - err = tegra_powergate_power_up(&pg, false); + pg->id = id; + pg->clks = &clk; + pg->num_clks = 1; + pg->reset = rst; + pg->pmc = pmc; + + err = tegra_powergate_power_up(pg, false); if (err) pr_err("failed to turn on partition %d: %d\n", id, err); + kfree(pg); + return err; } EXPORT_SYMBOL(tegra_powergate_sequence_power_up); diff --git a/drivers/soundwire/bus_type.c b/drivers/soundwire/bus_type.c index d5f3a70c06b059..283b2832728ee0 100644 --- a/drivers/soundwire/bus_type.c +++ b/drivers/soundwire/bus_type.c @@ -83,17 +83,16 @@ static int sdw_drv_probe(struct device *dev) * attach to power domain but don't turn on (last arg) */ ret = dev_pm_domain_attach(dev, false); - if (ret != -EPROBE_DEFER) { - ret = drv->probe(slave, id); - if (ret) { - dev_err(dev, "Probe of %s failed: %d\n", drv->name, ret); - dev_pm_domain_detach(dev, false); - } - } - if (ret) return ret; + ret = drv->probe(slave, id); + if (ret) { + dev_err(dev, "Probe of %s failed: %d\n", drv->name, ret); + dev_pm_domain_detach(dev, false); + return ret; + } + /* device is probed so let's read the properties now */ if (slave->ops && slave->ops->read_prop) slave->ops->read_prop(slave); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 20b5b2754830ec..efc624f9e490e6 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -359,11 +359,12 @@ static int spi_drv_probe(struct device *dev) } ret = dev_pm_domain_attach(dev, true); - if (ret != -EPROBE_DEFER) { - ret = sdrv->probe(spi); - if (ret) - dev_pm_domain_detach(dev, true); - } + if (ret) + return ret; + + ret = sdrv->probe(spi); + if (ret) + dev_pm_domain_detach(dev, true); return ret; } diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index da0215ea9f44e7..c01d790c17c2e4 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -753,6 +753,7 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status u32 gpe_number, acpi_event_status *event_status)) +ACPI_HW_DEPENDENT_RETURN_VOID(void acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void)) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 15bfb15c2fa5e1..c01675b3d93f45 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -899,7 +899,7 @@ static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; } static inline int acpi_dev_pm_attach(struct device *dev, bool power_on) { - return -ENODEV; + return 0; } #endif diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 87f48dd932eb1e..882a9b9e34bc2c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -571,7 +571,7 @@ struct governor_attr { size_t count); }; -static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) +static inline bool cpufreq_this_cpu_can_update(struct cpufreq_policy *policy) { /* * Allow remote callbacks if: diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 1eefabf1621f40..4325d6fdde9b64 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -258,6 +258,7 @@ struct cpuidle_governor { #ifdef CONFIG_CPU_IDLE extern int cpuidle_register_governor(struct cpuidle_governor *gov); +extern int cpuidle_governor_latency_req(unsigned int cpu); #else static inline int cpuidle_register_governor(struct cpuidle_governor *gov) {return 0;} diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 6a1eb0b0aad96f..7aed926245311e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -542,6 +542,7 @@ extern enum system_states { SYSTEM_HALT, SYSTEM_POWER_OFF, SYSTEM_RESTART, + SYSTEM_SUSPEND, } system_state; /* This cannot be an enum because some may be used in assembly source. */ diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 6d731110e0db14..f35c7bf7614302 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -43,9 +43,7 @@ * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. * As compensation, srcu_notifier_chain_unregister() is rather expensive. * SRCU notifier chains should be used when the chain will be called very - * often but notifier_blocks will seldom be removed. Also, SRCU notifier - * chains are slightly more difficult to use because they require special - * runtime initialization. + * often but notifier_blocks will seldom be removed. */ struct notifier_block; @@ -91,7 +89,7 @@ struct srcu_notifier_head { (name)->head = NULL; \ } while (0) -/* srcu_notifier_heads must be initialized and cleaned up dynamically */ +/* srcu_notifier_heads must be cleaned up dynamically */ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #define srcu_cleanup_notifier_head(name) \ cleanup_srcu_struct(&(name)->srcu); @@ -104,7 +102,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); .head = NULL } #define RAW_NOTIFIER_INIT(name) { \ .head = NULL } -/* srcu_notifier_heads cannot be initialized statically */ + +#define SRCU_NOTIFIER_INIT(name, pcpu) \ + { \ + .mutex = __MUTEX_INITIALIZER(name.mutex), \ + .head = NULL, \ + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \ + } #define ATOMIC_NOTIFIER_HEAD(name) \ struct atomic_notifier_head name = \ @@ -116,6 +120,26 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); struct raw_notifier_head name = \ RAW_NOTIFIER_INIT(name) +#ifdef CONFIG_TREE_SRCU +#define _SRCU_NOTIFIER_HEAD(name, mod) \ + static DEFINE_PER_CPU(struct srcu_data, \ + name##_head_srcu_data); \ + mod struct srcu_notifier_head name = \ + SRCU_NOTIFIER_INIT(name, name##_head_srcu_data) + +#else +#define _SRCU_NOTIFIER_HEAD(name, mod) \ + mod struct srcu_notifier_head name = \ + SRCU_NOTIFIER_INIT(name, name) + +#endif + +#define SRCU_NOTIFIER_HEAD(name) \ + _SRCU_NOTIFIER_HEAD(name, /* not static */) + +#define SRCU_NOTIFIER_HEAD_STATIC(name) \ + _SRCU_NOTIFIER_HEAD(name, static) + #ifdef __KERNEL__ extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 04dbef9847d3ad..42e0d649e65315 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -47,8 +47,10 @@ struct genpd_power_state { }; struct genpd_lock_ops; +struct dev_pm_opp; struct generic_pm_domain { + struct device dev; struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ struct list_head master_links; /* Links with PM domain as a master */ @@ -67,6 +69,8 @@ struct generic_pm_domain { unsigned int performance_state; /* Aggregated max performance state */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); + unsigned int (*opp_to_performance_state)(struct generic_pm_domain *genpd, + struct dev_pm_opp *opp); int (*set_performance_state)(struct generic_pm_domain *genpd, unsigned int state); struct gpd_dev_ops dev_ops; @@ -139,21 +143,16 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) return to_gpd_data(dev->power.subsys_data->domain_data); } -extern int __pm_genpd_add_device(struct generic_pm_domain *genpd, - struct device *dev, - struct gpd_timing_data *td); - -extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, - struct device *dev); -extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, - struct generic_pm_domain *new_subdomain); -extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, - struct generic_pm_domain *target); -extern int pm_genpd_init(struct generic_pm_domain *genpd, - struct dev_power_governor *gov, bool is_off); -extern int pm_genpd_remove(struct generic_pm_domain *genpd); -extern int dev_pm_genpd_set_performance_state(struct device *dev, - unsigned int state); +int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev); +int pm_genpd_remove_device(struct device *dev); +int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, + struct generic_pm_domain *new_subdomain); +int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, + struct generic_pm_domain *target); +int pm_genpd_init(struct generic_pm_domain *genpd, + struct dev_power_governor *gov, bool is_off); +int pm_genpd_remove(struct generic_pm_domain *genpd); +int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -163,14 +162,12 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) { return ERR_PTR(-ENOSYS); } -static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd, - struct device *dev, - struct gpd_timing_data *td) +static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, + struct device *dev) { return -ENOSYS; } -static inline int pm_genpd_remove_device(struct generic_pm_domain *genpd, - struct device *dev) +static inline int pm_genpd_remove_device(struct device *dev) { return -ENOSYS; } @@ -204,15 +201,9 @@ static inline int dev_pm_genpd_set_performance_state(struct device *dev, #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif -static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, - struct device *dev) -{ - return __pm_genpd_add_device(genpd, dev, NULL); -} - #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP -extern void pm_genpd_syscore_poweroff(struct device *dev); -extern void pm_genpd_syscore_poweron(struct device *dev); +void pm_genpd_syscore_poweroff(struct device *dev); +void pm_genpd_syscore_poweron(struct device *dev); #else static inline void pm_genpd_syscore_poweroff(struct device *dev) {} static inline void pm_genpd_syscore_poweron(struct device *dev) {} @@ -236,13 +227,14 @@ int of_genpd_add_provider_simple(struct device_node *np, int of_genpd_add_provider_onecell(struct device_node *np, struct genpd_onecell_data *data); void of_genpd_del_provider(struct device_node *np); -extern int of_genpd_add_device(struct of_phandle_args *args, - struct device *dev); -extern int of_genpd_add_subdomain(struct of_phandle_args *parent, - struct of_phandle_args *new_subdomain); -extern struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); -extern int of_genpd_parse_idle_states(struct device_node *dn, - struct genpd_power_state **states, int *n); +int of_genpd_add_device(struct of_phandle_args *args, struct device *dev); +int of_genpd_add_subdomain(struct of_phandle_args *parent, + struct of_phandle_args *new_subdomain); +struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); +int of_genpd_parse_idle_states(struct device_node *dn, + struct genpd_power_state **states, int *n); +unsigned int of_genpd_opp_to_performance_state(struct device *dev, + struct device_node *opp_node); int genpd_dev_pm_attach(struct device *dev); #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ @@ -278,11 +270,18 @@ static inline int of_genpd_parse_idle_states(struct device_node *dn, return -ENODEV; } -static inline int genpd_dev_pm_attach(struct device *dev) +static inline unsigned int +of_genpd_opp_to_performance_state(struct device *dev, + struct device_node *opp_node) { return -ENODEV; } +static inline int genpd_dev_pm_attach(struct device *dev) +{ + return 0; +} + static inline struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) { @@ -291,13 +290,13 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ #ifdef CONFIG_PM -extern int dev_pm_domain_attach(struct device *dev, bool power_on); -extern void dev_pm_domain_detach(struct device *dev, bool power_off); -extern void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); +int dev_pm_domain_attach(struct device *dev, bool power_on); +void dev_pm_domain_detach(struct device *dev, bool power_off); +void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); #else static inline int dev_pm_domain_attach(struct device *dev, bool power_on) { - return -ENODEV; + return 0; } static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {} static inline void dev_pm_domain_set(struct device *dev, diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 6c2d2e88f0666b..099b31960dec24 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -125,8 +125,6 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); -struct opp_table *dev_pm_opp_register_get_pstate_helper(struct device *dev, int (*get_pstate)(struct device *dev, unsigned long rate)); -void dev_pm_opp_unregister_get_pstate_helper(struct opp_table *opp_table); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); @@ -247,14 +245,6 @@ static inline struct opp_table *dev_pm_opp_register_set_opp_helper(struct device static inline void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) {} -static inline struct opp_table *dev_pm_opp_register_get_pstate_helper(struct device *dev, - int (*get_pstate)(struct device *dev, unsigned long rate)) -{ - return ERR_PTR(-ENOTSUPP); -} - -static inline void dev_pm_opp_unregister_get_pstate_helper(struct opp_table *opp_table) {} - static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) { return ERR_PTR(-ENOTSUPP); @@ -303,17 +293,25 @@ static inline void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) int dev_pm_opp_of_add_table(struct device *dev); +int dev_pm_opp_of_add_table_indexed(struct device *dev, int index); void dev_pm_opp_of_remove_table(struct device *dev); int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask); void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask); int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev); +struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, struct device_node *np); +struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp); #else static inline int dev_pm_opp_of_add_table(struct device *dev) { return -ENOTSUPP; } +static inline int dev_pm_opp_of_add_table_indexed(struct device *dev, int index) +{ + return -ENOTSUPP; +} + static inline void dev_pm_opp_of_remove_table(struct device *dev) { } @@ -336,6 +334,15 @@ static inline struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device { return NULL; } + +static inline struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, struct device_node *np) +{ + return NULL; +} +static inline struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp) +{ + return NULL; +} #endif #endif /* __LINUX_OPP_H__ */ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index f0fc4700b6ff53..db5dbbf7a48d45 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -56,8 +56,7 @@ extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); extern void pm_runtime_clean_up_links(struct device *dev); -extern void pm_runtime_get_suppliers(struct device *dev); -extern void pm_runtime_put_suppliers(struct device *dev); +extern void pm_runtime_resume_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device *dev); @@ -173,8 +172,7 @@ static inline unsigned long pm_runtime_autosuspend_expiration( static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} static inline void pm_runtime_clean_up_links(struct device *dev) {} -static inline void pm_runtime_get_suppliers(struct device *dev) {} -static inline void pm_runtime_put_suppliers(struct device *dev) {} +static inline void pm_runtime_resume_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device *dev) {} diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 261471f407a55c..f41d2fb09f87bc 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -43,7 +43,7 @@ struct srcu_struct { void srcu_drive_gp(struct work_struct *wp); -#define __SRCU_STRUCT_INIT(name) \ +#define __SRCU_STRUCT_INIT(name, __ignored) \ { \ .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ .srcu_cb_tail = &name.srcu_cb_head, \ @@ -56,9 +56,9 @@ void srcu_drive_gp(struct work_struct *wp); * Tree SRCU, which needs some per-CPU data. */ #define DEFINE_SRCU(name) \ - struct srcu_struct name = __SRCU_STRUCT_INIT(name) + struct srcu_struct name = __SRCU_STRUCT_INIT(name, name) #define DEFINE_STATIC_SRCU(name) \ - static struct srcu_struct name = __SRCU_STRUCT_INIT(name) + static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name) void synchronize_srcu(struct srcu_struct *sp); diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 4eda108abee065..745d4ca4dd503c 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -104,9 +104,9 @@ struct srcu_struct { #define SRCU_STATE_SCAN1 1 #define SRCU_STATE_SCAN2 2 -#define __SRCU_STRUCT_INIT(name) \ +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ { \ - .sda = &name##_srcu_data, \ + .sda = &pcpu_name, \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq_needed = 0 - 1, \ __SRCU_DEP_MAP_INIT(name) \ @@ -133,7 +133,7 @@ struct srcu_struct { */ #define __DEFINE_SRCU(name, is_static) \ static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\ - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_data) #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 5454cc639a8d16..9c85c782238308 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -287,6 +287,8 @@ static int create_image(int platform_mode) local_irq_disable(); + system_state = SYSTEM_SUSPEND; + error = syscore_suspend(); if (error) { pr_err("Some system devices failed to power down, aborting hibernation\n"); @@ -317,6 +319,7 @@ static int create_image(int platform_mode) syscore_resume(); Enable_irqs: + system_state = SYSTEM_RUNNING; local_irq_enable(); Enable_cpus: @@ -445,6 +448,7 @@ static int resume_target_kernel(bool platform_mode) goto Enable_cpus; local_irq_disable(); + system_state = SYSTEM_SUSPEND; error = syscore_suspend(); if (error) @@ -478,6 +482,7 @@ static int resume_target_kernel(bool platform_mode) syscore_resume(); Enable_irqs: + system_state = SYSTEM_RUNNING; local_irq_enable(); Enable_cpus: @@ -563,6 +568,7 @@ int hibernation_platform_enter(void) goto Enable_cpus; local_irq_disable(); + system_state = SYSTEM_SUSPEND; syscore_suspend(); if (pm_wakeup_pending()) { error = -EAGAIN; @@ -575,6 +581,7 @@ int hibernation_platform_enter(void) Power_up: syscore_resume(); + system_state = SYSTEM_RUNNING; local_irq_enable(); Enable_cpus: diff --git a/kernel/power/qos.c b/kernel/power/qos.c index fa39092b7aea2c..86d72ffb811b64 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -184,7 +184,6 @@ static inline void pm_qos_set_value(struct pm_qos_constraints *c, s32 value) c->target_value = value; } -static inline int pm_qos_get_value(struct pm_qos_constraints *c); static int pm_qos_dbg_show_requests(struct seq_file *s, void *unused) { struct pm_qos_object *qos = (struct pm_qos_object *)s->private; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4c10be0f4843a4..87331565e5050a 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -57,10 +58,10 @@ EXPORT_SYMBOL_GPL(pm_suspend_global_flags); static const struct platform_suspend_ops *suspend_ops; static const struct platform_s2idle_ops *s2idle_ops; -static DECLARE_WAIT_QUEUE_HEAD(s2idle_wait_head); +static DECLARE_SWAIT_QUEUE_HEAD(s2idle_wait_head); enum s2idle_states __read_mostly s2idle_state; -static DEFINE_SPINLOCK(s2idle_lock); +static DEFINE_RAW_SPINLOCK(s2idle_lock); void s2idle_set_ops(const struct platform_s2idle_ops *ops) { @@ -78,12 +79,12 @@ static void s2idle_enter(void) { trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, true); - spin_lock_irq(&s2idle_lock); + raw_spin_lock_irq(&s2idle_lock); if (pm_wakeup_pending()) goto out; s2idle_state = S2IDLE_STATE_ENTER; - spin_unlock_irq(&s2idle_lock); + raw_spin_unlock_irq(&s2idle_lock); get_online_cpus(); cpuidle_resume(); @@ -91,17 +92,17 @@ static void s2idle_enter(void) /* Push all the CPUs into the idle loop. */ wake_up_all_idle_cpus(); /* Make the current CPU wait so it can enter the idle loop too. */ - wait_event(s2idle_wait_head, - s2idle_state == S2IDLE_STATE_WAKE); + swait_event(s2idle_wait_head, + s2idle_state == S2IDLE_STATE_WAKE); cpuidle_pause(); put_online_cpus(); - spin_lock_irq(&s2idle_lock); + raw_spin_lock_irq(&s2idle_lock); out: s2idle_state = S2IDLE_STATE_NONE; - spin_unlock_irq(&s2idle_lock); + raw_spin_unlock_irq(&s2idle_lock); trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, false); } @@ -156,12 +157,12 @@ void s2idle_wake(void) { unsigned long flags; - spin_lock_irqsave(&s2idle_lock, flags); + raw_spin_lock_irqsave(&s2idle_lock, flags); if (s2idle_state > S2IDLE_STATE_NONE) { s2idle_state = S2IDLE_STATE_WAKE; - wake_up(&s2idle_wait_head); + swake_up(&s2idle_wait_head); } - spin_unlock_irqrestore(&s2idle_lock, flags); + raw_spin_unlock_irqrestore(&s2idle_lock, flags); } EXPORT_SYMBOL_GPL(s2idle_wake); @@ -428,6 +429,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); + system_state = SYSTEM_SUSPEND; + error = syscore_suspend(); if (!error) { *wakeup = pm_wakeup_pending(); @@ -443,6 +446,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) syscore_resume(); } + system_state = SYSTEM_RUNNING; + arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); diff --git a/kernel/power/user.c b/kernel/power/user.c index 75c959de4b29cb..abd22555027135 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -186,6 +186,11 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, res = PAGE_SIZE - pg_offp; } + if (!data_of(data->handle)) { + res = -EINVAL; + goto unlock; + } + res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp, buf, count); if (res > 0) diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index dfba59be190b6a..4210152e56f0f4 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -188,6 +188,7 @@ static struct wakelock *wakelock_lookup_add(const char *name, size_t len, return ERR_PTR(-ENOMEM); } wl->ws.name = wl->name; + wl->ws.last_time = ktime_get(); wakeup_source_add(&wl->ws); rb_link_node(&wl->node, parent, node); rb_insert_color(&wl->node, &wakelocks_tree); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 28592b62b1d5cb..3cde46483f0aa5 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -51,7 +51,7 @@ struct sugov_cpu { bool iowait_boost_pending; unsigned int iowait_boost; unsigned int iowait_boost_max; - u64 last_update; + u64 last_update; /* The fields below are only needed when sharing a policy: */ unsigned long util_cfs; @@ -89,46 +89,52 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) * schedule the kthread. */ if (sg_policy->policy->fast_switch_enabled && - !cpufreq_can_do_remote_dvfs(sg_policy->policy)) + !cpufreq_this_cpu_can_update(sg_policy->policy)) return false; - if (sg_policy->work_in_progress) - return false; - - if (unlikely(sg_policy->need_freq_update)) { - sg_policy->need_freq_update = false; - /* - * This happens when limits change, so forget the previous - * next_freq value and force an update. - */ - sg_policy->next_freq = UINT_MAX; + if (unlikely(sg_policy->need_freq_update)) return true; - } delta_ns = time - sg_policy->last_freq_update_time; return delta_ns >= sg_policy->freq_update_delay_ns; } -static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, - unsigned int next_freq) +static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, + unsigned int next_freq) { - struct cpufreq_policy *policy = sg_policy->policy; - if (sg_policy->next_freq == next_freq) - return; + return false; sg_policy->next_freq = next_freq; sg_policy->last_freq_update_time = time; - if (policy->fast_switch_enabled) { - next_freq = cpufreq_driver_fast_switch(policy, next_freq); - if (!next_freq) - return; + return true; +} - policy->cur = next_freq; - trace_cpu_frequency(next_freq, smp_processor_id()); - } else { +static void sugov_fast_switch(struct sugov_policy *sg_policy, u64 time, + unsigned int next_freq) +{ + struct cpufreq_policy *policy = sg_policy->policy; + + if (!sugov_update_next_freq(sg_policy, time, next_freq)) + return; + + next_freq = cpufreq_driver_fast_switch(policy, next_freq); + if (!next_freq) + return; + + policy->cur = next_freq; + trace_cpu_frequency(next_freq, smp_processor_id()); +} + +static void sugov_deferred_update(struct sugov_policy *sg_policy, u64 time, + unsigned int next_freq) +{ + if (!sugov_update_next_freq(sg_policy, time, next_freq)) + return; + + if (!sg_policy->work_in_progress) { sg_policy->work_in_progress = true; irq_work_queue(&sg_policy->irq_work); } @@ -165,8 +171,10 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, freq = (freq + (freq >> 2)) * util / max; - if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) + if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) return sg_policy->next_freq; + + sg_policy->need_freq_update = false; sg_policy->cached_raw_freq = freq; return cpufreq_driver_resolve_freq(policy, freq); } @@ -200,43 +208,120 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs)); } -static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags) +/** + * sugov_iowait_reset() - Reset the IO boost status of a CPU. + * @sg_cpu: the sugov data for the CPU to boost + * @time: the update time from the caller + * @set_iowait_boost: true if an IO boost has been requested + * + * The IO wait boost of a task is disabled after a tick since the last update + * of a CPU. If a new IO wait boost is requested after more then a tick, then + * we enable the boost starting from the minimum frequency, which improves + * energy efficiency by ignoring sporadic wakeups from IO. + */ +static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, + bool set_iowait_boost) { - if (flags & SCHED_CPUFREQ_IOWAIT) { - if (sg_cpu->iowait_boost_pending) - return; + s64 delta_ns = time - sg_cpu->last_update; - sg_cpu->iowait_boost_pending = true; + /* Reset boost only if a tick has elapsed since last request */ + if (delta_ns <= TICK_NSEC) + return false; - if (sg_cpu->iowait_boost) { - sg_cpu->iowait_boost <<= 1; - if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; - } else { - sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; - } - } else if (sg_cpu->iowait_boost) { - s64 delta_ns = time - sg_cpu->last_update; + sg_cpu->iowait_boost = set_iowait_boost + ? sg_cpu->sg_policy->policy->min : 0; + sg_cpu->iowait_boost_pending = set_iowait_boost; - /* Clear iowait_boost if the CPU apprears to have been idle. */ - if (delta_ns > TICK_NSEC) { - sg_cpu->iowait_boost = 0; - sg_cpu->iowait_boost_pending = false; - } + return true; +} + +/** + * sugov_iowait_boost() - Updates the IO boost status of a CPU. + * @sg_cpu: the sugov data for the CPU to boost + * @time: the update time from the caller + * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait + * + * Each time a task wakes up after an IO operation, the CPU utilization can be + * boosted to a certain utilization which doubles at each "frequent and + * successive" wakeup from IO, ranging from the utilization of the minimum + * OPP to the utilization of the maximum OPP. + * To keep doubling, an IO boost has to be requested at least once per tick, + * otherwise we restart from the utilization of the minimum OPP. + */ +static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, + unsigned int flags) +{ + bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT; + + /* Reset boost if the CPU appears to have been idle enough */ + if (sg_cpu->iowait_boost && + sugov_iowait_reset(sg_cpu, time, set_iowait_boost)) + return; + + /* Boost only tasks waking up after IO */ + if (!set_iowait_boost) + return; + + /* Ensure boost doubles only one time at each request */ + if (sg_cpu->iowait_boost_pending) + return; + sg_cpu->iowait_boost_pending = true; + + /* Double the boost at each request */ + if (sg_cpu->iowait_boost) { + sg_cpu->iowait_boost <<= 1; + if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) + sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + return; } + + /* First wakeup after IO: start with minimum boost */ + sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; } -static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, - unsigned long *max) +/** + * sugov_iowait_apply() - Apply the IO boost to a CPU. + * @sg_cpu: the sugov data for the cpu to boost + * @time: the update time from the caller + * @util: the utilization to (eventually) boost + * @max: the maximum value the utilization can be boosted to + * + * A CPU running a task which woken up after an IO operation can have its + * utilization boosted to speed up the completion of those IO operations. + * The IO boost value is increased each time a task wakes up from IO, in + * sugov_iowait_apply(), and it's instead decreased by this function, + * each time an increase has not been requested (!iowait_boost_pending). + * + * A CPU which also appears to have been idle for at least one tick has also + * its IO boost utilization reset. + * + * This mechanism is designed to boost high frequently IO waiting tasks, while + * being more conservative on tasks which does sporadic IO operations. + */ +static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, + unsigned long *util, unsigned long *max) { unsigned int boost_util, boost_max; + /* No boost currently required */ if (!sg_cpu->iowait_boost) return; + /* Reset boost if the CPU appears to have been idle enough */ + if (sugov_iowait_reset(sg_cpu, time, false)) + return; + + /* + * An IO waiting task has just woken up: + * allow to further double the boost value + */ if (sg_cpu->iowait_boost_pending) { sg_cpu->iowait_boost_pending = false; } else { + /* + * Otherwise: reduce the boost value and disable it when we + * reach the minimum. + */ sg_cpu->iowait_boost >>= 1; if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { sg_cpu->iowait_boost = 0; @@ -244,9 +329,12 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, } } + /* + * Apply the current boost value: a CPU is boosted only if its current + * utilization is smaller then the current IO boost level. + */ boost_util = sg_cpu->iowait_boost; boost_max = sg_cpu->iowait_boost_max; - if (*util * boost_max < *max * boost_util) { *util = boost_util; *max = boost_max; @@ -285,7 +373,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, unsigned int next_f; bool busy; - sugov_set_iowait_boost(sg_cpu, time, flags); + sugov_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; ignore_dl_rate_limit(sg_cpu, sg_policy); @@ -298,21 +386,31 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, sugov_get_util(sg_cpu); max = sg_cpu->max; util = sugov_aggregate_util(sg_cpu); - sugov_iowait_boost(sg_cpu, &util, &max); + sugov_iowait_apply(sg_cpu, time, &util, &max); next_f = get_next_freq(sg_policy, util, max); /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. */ - if (busy && next_f < sg_policy->next_freq && - sg_policy->next_freq != UINT_MAX) { + if (busy && next_f < sg_policy->next_freq) { next_f = sg_policy->next_freq; /* Reset cached freq as next_freq has changed */ sg_policy->cached_raw_freq = 0; } - sugov_update_commit(sg_policy, time, next_f); + /* + * This code runs under rq->lock for the target CPU, so it won't run + * concurrently on two different CPUs for the same target and it is not + * necessary to acquire the lock in the fast switch case. + */ + if (sg_policy->policy->fast_switch_enabled) { + sugov_fast_switch(sg_policy, time, next_f); + } else { + raw_spin_lock(&sg_policy->update_lock); + sugov_deferred_update(sg_policy, time, next_f); + raw_spin_unlock(&sg_policy->update_lock); + } } static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) @@ -325,28 +423,12 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); unsigned long j_util, j_max; - s64 delta_ns; sugov_get_util(j_sg_cpu); - - /* - * If the CFS CPU utilization was last updated before the - * previous frequency update and the time elapsed between the - * last update of the CPU utilization and the last frequency - * update is long enough, reset iowait_boost and util_cfs, as - * they are now probably stale. However, still consider the - * CPU contribution if it has some DEADLINE utilization - * (util_dl). - */ - delta_ns = time - j_sg_cpu->last_update; - if (delta_ns > TICK_NSEC) { - j_sg_cpu->iowait_boost = 0; - j_sg_cpu->iowait_boost_pending = false; - } - j_max = j_sg_cpu->max; j_util = sugov_aggregate_util(j_sg_cpu); - sugov_iowait_boost(j_sg_cpu, &j_util, &j_max); + sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max); + if (j_util * max > j_max * util) { util = j_util; max = j_max; @@ -365,14 +447,18 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) raw_spin_lock(&sg_policy->update_lock); - sugov_set_iowait_boost(sg_cpu, time, flags); + sugov_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; ignore_dl_rate_limit(sg_cpu, sg_policy); if (sugov_should_update_freq(sg_policy, time)) { next_f = sugov_next_freq_shared(sg_cpu, time); - sugov_update_commit(sg_policy, time, next_f); + + if (sg_policy->policy->fast_switch_enabled) + sugov_fast_switch(sg_policy, time, next_f); + else + sugov_deferred_update(sg_policy, time, next_f); } raw_spin_unlock(&sg_policy->update_lock); @@ -381,13 +467,27 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) static void sugov_work(struct kthread_work *work) { struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); + unsigned int freq; + unsigned long flags; + + /* + * Hold sg_policy->update_lock shortly to handle the case where: + * incase sg_policy->next_freq is read here, and then updated by + * sugov_deferred_update() just before work_in_progress is set to false + * here, we may miss queueing the new update. + * + * Note: If a work was queued after the update_lock is released, + * sugov_work() will just be called again by kthread_work code; and the + * request will be proceed before the sugov thread sleeps. + */ + raw_spin_lock_irqsave(&sg_policy->update_lock, flags); + freq = sg_policy->next_freq; + sg_policy->work_in_progress = false; + raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags); mutex_lock(&sg_policy->work_lock); - __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, - CPUFREQ_RELATION_L); + __cpufreq_driver_target(sg_policy->policy, freq, CPUFREQ_RELATION_L); mutex_unlock(&sg_policy->work_lock); - - sg_policy->work_in_progress = false; } static void sugov_irq_work(struct irq_work *irq_work) @@ -510,11 +610,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) } sg_policy->thread = thread; - - /* Kthread is bound to all CPUs by default */ - if (!policy->dvfs_possible_from_any_cpu) - kthread_bind_mask(thread, policy->related_cpus); - + kthread_bind_mask(thread, policy->related_cpus); init_irq_work(&sg_policy->irq_work, sugov_irq_work); mutex_init(&sg_policy->work_lock); @@ -657,7 +753,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; sg_policy->last_freq_update_time = 0; - sg_policy->next_freq = UINT_MAX; + sg_policy->next_freq = 0; sg_policy->work_in_progress = false; sg_policy->need_freq_update = false; sg_policy->cached_raw_freq = 0; diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 78e598334007b6..b7005dd21ec16c 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -491,6 +491,7 @@ void tick_freeze(void) if (tick_freeze_depth == num_online_cpus()) { trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), true); + system_state = SYSTEM_SUSPEND; timekeeping_suspend(); } else { tick_suspend_local(); @@ -514,6 +515,7 @@ void tick_unfreeze(void) if (tick_freeze_depth == num_online_cpus()) { timekeeping_resume(); + system_state = SYSTEM_RUNNING; trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), false); } else { diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py index abb4c38f029b70..8ee626c0f6a5d8 100755 --- a/tools/power/pm-graph/bootgraph.py +++ b/tools/power/pm-graph/bootgraph.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python2 # # Tool for analyzing boot timing # Copyright (c) 2013, Intel Corporation. diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8 index 18baaf6300c936..070be2cf7f7439 100644 --- a/tools/power/pm-graph/sleepgraph.8 +++ b/tools/power/pm-graph/sleepgraph.8 @@ -168,6 +168,7 @@ Create a summary page of all tests in \fIindir\fR. Creates summary.html in the current folder. The output page is a table of tests with suspend and resume values sorted by suspend mode, host, and kernel. Includes test averages by mode and links to the test html files. +Use -genhtml to include tests with missing html. .TP \fB-modes\fR List available suspend modes. @@ -179,6 +180,9 @@ with any options you intend to use to see if they will work. \fB-fpdt\fR Print out the contents of the ACPI Firmware Performance Data Table. .TP +\fB-battery\fR +Print out battery status and current charge. +.TP \fB-sysinfo\fR Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode. .TP diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 266409fb27ae6c..0c760478f7d7dd 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python2 # # Tool for analyzing suspend/resume timing # Copyright (c) 2013, Intel Corporation. @@ -69,7 +69,7 @@ # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.0' + version = '5.1' ansi = False rs = 0 display = 0 @@ -240,7 +240,7 @@ class SystemValues: kprobes = dict() timeformat = '%.3f' cmdline = '%s %s' % \ - (os.path.basename(sys.argv[0]), string.join(sys.argv[1:], ' ')) + (os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:])) def __init__(self): self.archargs = 'args_'+platform.machine() self.hostname = platform.node() @@ -917,12 +917,18 @@ def __init__(self, num): self.devicegroups.append([phase]) self.errorinfo = {'suspend':[],'resume':[]} def extractErrorInfo(self): + elist = { + 'HWERROR' : '.*\[ *Hardware Error *\].*', + 'FWBUG' : '.*\[ *Firmware Bug *\].*', + 'BUG' : '.*BUG.*', + 'ERROR' : '.*ERROR.*', + 'WARNING' : '.*WARNING.*', + 'IRQ' : '.*genirq: .*', + 'TASKFAIL': '.*Freezing of tasks failed.*', + } lf = sysvals.openlog(sysvals.dmesgfile, 'r') i = 0 list = [] - # sl = start line, et = error time, el = error line - type = 'ERROR' - sl = et = el = -1 for line in lf: i += 1 m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) @@ -931,43 +937,13 @@ def extractErrorInfo(self): t = float(m.group('ktime')) if t < self.start or t > self.end: continue - if t < self.tSuspended: - dir = 'suspend' - else: - dir = 'resume' + dir = 'suspend' if t < self.tSuspended else 'resume' msg = m.group('msg') - if re.match('-*\[ *cut here *\]-*', msg): - type = 'WARNING' - sl = i - elif re.match('genirq: .*', msg): - type = 'IRQ' - sl = i - elif re.match('BUG: .*', msg) or re.match('kernel BUG .*', msg): - type = 'BUG' - sl = i - elif re.match('-*\[ *end trace .*\]-*', msg) or \ - re.match('R13: .*', msg): - if et >= 0 and sl >= 0: - list.append((type, dir, et, sl, i)) - self.kerror = True - sl = et = el = -1 - type = 'ERROR' - elif 'Call Trace:' in msg: - if el >= 0 and et >= 0: - list.append((type, dir, et, el, el)) + for err in elist: + if re.match(elist[err], msg): + list.append((err, dir, t, i, i)) self.kerror = True - et, el = t, i - if sl < 0 or type == 'BUG': - slval = i - if sl >= 0: - slval = sl - list.append((type, dir, et, slval, i)) - self.kerror = True - sl = et = el = -1 - type = 'ERROR' - if el >= 0 and et >= 0: - list.append((type, dir, et, el, el)) - self.kerror = True + break for e in list: type, dir, t, idx1, idx2 = e sysvals.vprint('kernel %s found in %s at %f' % (type, dir, t)) @@ -2331,12 +2307,14 @@ def parseStamp(self, data, sv): sv.suspendmode = data.stamp['mode'] if sv.suspendmode == 'command' and sv.ftracefile != '': modes = ['on', 'freeze', 'standby', 'mem', 'disk'] - out = Popen(['grep', 'machine_suspend', sv.ftracefile], - stderr=PIPE, stdout=PIPE).stdout.read() - m = re.match('.* machine_suspend\[(?P.*)\]', out) - if m and m.group('mode') in ['1', '2', '3', '4']: - sv.suspendmode = modes[int(m.group('mode'))] - data.stamp['mode'] = sv.suspendmode + fp = sysvals.openlog(sv.ftracefile, 'r') + for line in fp: + m = re.match('.* machine_suspend\[(?P.*)\]', line) + if m and m.group('mode') in ['1', '2', '3', '4']: + sv.suspendmode = modes[int(m.group('mode'))] + data.stamp['mode'] = sv.suspendmode + break + fp.close() m = re.match(self.cmdlinefmt, self.cmdline) if m: sv.cmdline = m.group('cmd') @@ -2413,7 +2391,7 @@ def stop(self): # markers, and/or kprobes required for primary parsing. def doesTraceLogHaveTraceEvents(): kpcheck = ['_cal: (', '_cpu_down()'] - techeck = sysvals.traceevents[:] + techeck = ['suspend_resume'] tmcheck = ['SUSPEND START', 'RESUME COMPLETE'] sysvals.usekprobes = False fp = sysvals.openlog(sysvals.ftracefile, 'r') @@ -2808,7 +2786,7 @@ def parseTraceLog(live=False): # -- phase changes -- # start of kernel suspend if(re.match('suspend_enter\[.*', t.name)): - if(isbegin): + if(isbegin and data.start == data.tKernSus): data.dmesg[phase]['start'] = t.time data.tKernSus = t.time continue @@ -3072,13 +3050,20 @@ def parseTraceLog(live=False): sysvals.vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name)) cg.newActionFromFunction(data) if sysvals.suspendmode == 'command': - return testdata + return (testdata, '') # fill in any missing phases + error = [] for data in testdata: + tn = '' if len(testdata) == 1 else ('%d' % (data.testnumber + 1)) + terr = '' lp = data.phases[0] for p in data.phases: if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0): + if not terr: + print 'TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp) + terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, lp) + error.append(terr) sysvals.vprint('WARNING: phase "%s" is missing!' % p) if(data.dmesg[p]['start'] < 0): data.dmesg[p]['start'] = data.dmesg[lp]['end'] @@ -3106,7 +3091,7 @@ def parseTraceLog(live=False): for j in range(i + 1, tc): testdata[j].mergeOverlapDevices(devlist) testdata[0].stitchTouchingThreads(testdata[1:]) - return testdata + return (testdata, ', '.join(error)) # Function: loadKernelLog # Description: @@ -3173,7 +3158,7 @@ def loadKernelLog(): if data: testruns.append(data) if len(testruns) < 1: - doError(' dmesg log has no suspend/resume data: %s' \ + print('ERROR: dmesg log has no suspend/resume data: %s' \ % sysvals.dmesgfile) # fix lines with same timestamp/function with the call and return swapped @@ -3521,68 +3506,144 @@ def createHTMLSummarySimple(testruns, htmlfile, folder): .summary {border:1px solid;}\n\ th {border: 1px solid black;background:#222;color:white;}\n\ td {font: 16px "Times New Roman";text-align: center;}\n\ - tr.alt td {background:#ddd;}\n\ - tr.avg td {background:#aaa;}\n\ + tr.head td {border: 1px solid black;background:#aaa;}\n\ + tr.alt {background-color:#ddd;}\n\ + tr.notice {color:red;}\n\ + .minval {background-color:#BBFFBB;}\n\ + .medval {background-color:#BBBBFF;}\n\ + .maxval {background-color:#FFBBBB;}\n\ + .head a {color:#000;text-decoration: none;}\n\ \n\n\n' + # extract the test data into list + list = dict() + tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []] + iMin, iMed, iMax = [0, 0], [0, 0], [0, 0] + num = 0 + lastmode = '' + cnt = {'pass':0, 'fail':0, 'hang':0} + for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])): + mode = data['mode'] + if mode not in list: + list[mode] = {'data': [], 'avg': [0,0], 'min': [0,0], 'max': [0,0], 'med': [0,0]} + if lastmode and lastmode != mode and num > 0: + for i in range(2): + s = sorted(tMed[i]) + list[lastmode]['med'][i] = s[int(len(s)/2)] + iMed[i] = tMed[i].index(list[lastmode]['med'][i]) + list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num] + list[lastmode]['min'] = tMin + list[lastmode]['max'] = tMax + list[lastmode]['idx'] = (iMin, iMed, iMax) + tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []] + iMin, iMed, iMax = [0, 0], [0, 0], [0, 0] + num = 0 + tVal = [float(data['suspend']), float(data['resume'])] + list[mode]['data'].append([data['host'], data['kernel'], + data['time'], tVal[0], tVal[1], data['url'], data['result'], + data['issues']]) + idx = len(list[mode]['data']) - 1 + if data['result'] == 'pass': + cnt['pass'] += 1 + for i in range(2): + tMed[i].append(tVal[i]) + tAvg[i] += tVal[i] + if tMin[i] == 0 or tVal[i] < tMin[i]: + iMin[i] = idx + tMin[i] = tVal[i] + if tMax[i] == 0 or tVal[i] > tMax[i]: + iMax[i] = idx + tMax[i] = tVal[i] + num += 1 + elif data['result'] == 'hang': + cnt['hang'] += 1 + elif data['result'] == 'fail': + cnt['fail'] += 1 + lastmode = mode + if lastmode and num > 0: + for i in range(2): + s = sorted(tMed[i]) + list[lastmode]['med'][i] = s[int(len(s)/2)] + iMed[i] = tMed[i].index(list[lastmode]['med'][i]) + list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num] + list[lastmode]['min'] = tMin + list[lastmode]['max'] = tMax + list[lastmode]['idx'] = (iMin, iMed, iMax) + # group test header - html += '
%s (%d tests)
\n' % (folder, len(testruns)) + desc = [] + for ilk in sorted(cnt, reverse=True): + if cnt[ilk] > 0: + desc.append('%d %s' % (cnt[ilk], ilk)) + html += '
%s (%d tests: %s)
\n' % (folder, len(testruns), ', '.join(desc)) th = '\t{0}\n' td = '\t{0}\n' + tdh = '\t{0}\n' tdlink = '\thtml\n' # table header html += '\n\n' + th.format('#') +\ th.format('Mode') + th.format('Host') + th.format('Kernel') +\ - th.format('Test Time') + th.format('Suspend') + th.format('Resume') +\ - th.format('Detail') + '\n' - - # test data, 1 row per test - avg = ''+\ - '\n' - sTimeAvg = rTimeAvg = 0.0 - mode = '' - num = 0 - for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])): - if mode != data['mode']: - # test average line - if(num > 0): - sTimeAvg /= (num - 1) - rTimeAvg /= (num - 1) - html += avg.format('%d' % (num - 1), mode, - '%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg) - sTimeAvg = rTimeAvg = 0.0 - mode = data['mode'] - num = 1 - # alternate row color - if num % 2 == 1: - html += '\n' + th.format('Test Time') + th.format('Result') + th.format('Issues') +\ + th.format('Suspend') + th.format('Resume') + th.format('Detail') + '\n' + + # export list into html + head = ''+\ + ''+\ + '\n' + headnone = '\n' + for mode in list: + # header line for each suspend mode + num = 0 + tAvg, tMin, tMax, tMed = list[mode]['avg'], list[mode]['min'],\ + list[mode]['max'], list[mode]['med'] + count = len(list[mode]['data']) + if 'idx' in list[mode]: + iMin, iMed, iMax = list[mode]['idx'] + html += head.format('%d' % count, mode.upper(), + '%.3f' % tAvg[0], '%.3f' % tMin[0], '%.3f' % tMed[0], '%.3f' % tMax[0], + '%.3f' % tAvg[1], '%.3f' % tMin[1], '%.3f' % tMed[1], '%.3f' % tMax[1], + mode.lower() + ) else: - html += '\n' - html += td.format("%d" % num) - num += 1 - # basic info - for item in ['mode', 'host', 'kernel', 'time']: - val = "unknown" - if(item in data): - val = data[item] - html += td.format(val) - # suspend time - sTime = float(data['suspend']) - sTimeAvg += sTime - html += td.format('%.3f ms' % sTime) - # resume time - rTime = float(data['resume']) - rTimeAvg += rTime - html += td.format('%.3f ms' % rTime) - # link to the output html - html += tdlink.format(data['url']) + '\n' - # last test average line - if(num > 0): - sTimeAvg /= (num - 1) - rTimeAvg /= (num - 1) - html += avg.format('%d' % (num - 1), mode, - '%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg) + iMin = iMed = iMax = [-1, -1, -1] + html += headnone.format('%d' % count, mode.upper()) + for d in list[mode]['data']: + # row classes - alternate row color + rcls = ['alt'] if num % 2 == 1 else [] + if d[6] != 'pass': + rcls.append('notice') + html += '\n' if len(rcls) > 0 else '\n' + # figure out if the line has sus or res highlighted + idx = list[mode]['data'].index(d) + tHigh = ['', ''] + for i in range(2): + tag = 's%s' % mode if i == 0 else 'r%s' % mode + if idx == iMin[i]: + tHigh[i] = ' id="%smin" class=minval title="Minimum"' % tag + elif idx == iMax[i]: + tHigh[i] = ' id="%smax" class=maxval title="Maximum"' % tag + elif idx == iMed[i]: + tHigh[i] = ' id="%smed" class=medval title="Median"' % tag + html += td.format("%d" % (list[mode]['data'].index(d) + 1)) # row + html += td.format(mode) # mode + html += td.format(d[0]) # host + html += td.format(d[1]) # kernel + html += td.format(d[2]) # time + html += td.format(d[6]) # result + html += td.format(d[7]) # issues + html += tdh.format('%.3f ms' % d[3], tHigh[0]) if d[3] else td.format('') # suspend + html += tdh.format('%.3f ms' % d[4], tHigh[1]) if d[4] else td.format('') # resume + html += tdlink.format(d[5]) if d[5] else td.format('') # url + html += '\n' + num += 1 # flush the data to file hf = open(htmlfile, 'w') @@ -3607,7 +3668,7 @@ def ordinal(value): # testruns: array of Data objects from parseKernelLog or parseTraceLog # Output: # True if the html file was created, false if it failed -def createHTML(testruns): +def createHTML(testruns, testfail): if len(testruns) < 1: print('ERROR: Not enough test data to build a timeline') return @@ -3641,6 +3702,7 @@ def createHTML(testruns): ''\ ''\ '\n
Average of {0} {1} tests{2}{3}
{0}{1}Suspend Avg={2} '+\ + 'Min={3} '+\ + 'Med={4} '+\ + 'Max={5} '+\ + 'Resume Avg={6} '+\ + 'Min={7} '+\ + 'Med={8} '+\ + 'Max={9}
{0}{1}
{4}Firmware Resume: {2} ms{4}Kernel Resume: {3} ms
\n' + html_fail = '
{0}
\n' # html format variables scaleH = 20 @@ -3708,6 +3770,9 @@ def createHTML(testruns): resume_time, testdesc, stitle, rtitle) devtl.html += thtml + if testfail: + devtl.html += html_fail.format(testfail) + # time scale for potentially multiple datasets t0 = testruns[0].start tMax = testruns[-1].end @@ -4006,6 +4071,7 @@ def addCSS(hf, sv, testcount=1, kerror=False, extra=''): .blue {background:rgba(169,208,245,0.4);}\n\ .time1 {font:22px Arial;border:1px solid;}\n\ .time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\ + .testfail {font:bold 22px Arial;color:red;border:1px dashed;}\n\ td {text-align:center;}\n\ r {color:#500000;font:15px Tahoma;}\n\ n {color:#505050;font:15px Tahoma;}\n\ @@ -4927,6 +4993,25 @@ def dmidecode(mempath, fatal=False): count += 1 return out +def getBattery(): + p = '/sys/class/power_supply' + bat = dict() + for d in os.listdir(p): + type = sysvals.getVal(os.path.join(p, d, 'type')).strip().lower() + if type != 'battery': + continue + for v in ['status', 'energy_now', 'capacity_now']: + bat[v] = sysvals.getVal(os.path.join(p, d, v)).strip().lower() + break + ac = True + if 'status' in bat and 'discharging' in bat['status']: + ac = False + charge = 0 + for v in ['energy_now', 'capacity_now']: + if v in bat and bat[v]: + charge = int(bat[v]) + return (ac, charge) + # Function: getFPDT # Description: # Read the acpi bios tables and pull out FPDT, the firmware data @@ -5202,8 +5287,9 @@ def getArgFloat(name, args, min, max, main=True): def processData(live=False): print('PROCESSING DATA') + error = '' if(sysvals.usetraceevents): - testruns = parseTraceLog(live) + testruns, error = parseTraceLog(live) if sysvals.dmesgfile: for data in testruns: data.extractErrorInfo() @@ -5220,15 +5306,18 @@ def processData(live=False): for data in testruns: data.debugPrint() sys.exit() - + if len(testruns) < 1: + return (testruns, {'error': 'timeline generation failed'}) sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile) - createHTML(testruns) + createHTML(testruns, error) print('DONE') data = testruns[0] stamp = data.stamp stamp['suspend'], stamp['resume'] = data.getTimeValues() if data.fwValid: stamp['fwsuspend'], stamp['fwresume'] = data.fwSuspend, data.fwResume + if error: + stamp['error'] = error return (testruns, stamp) # Function: rerunTest @@ -5268,58 +5357,88 @@ def runTest(n=0): sysvals.sudouser(sysvals.testdir) sysvals.outputResult(stamp, n) -def find_in_html(html, strs, div=False): - for str in strs: - l = len(str) - i = html.find(str) - if i >= 0: +def find_in_html(html, start, end, firstonly=True): + n, out = 0, [] + while n < len(html): + m = re.search(start, html[n:]) + if not m: break - if i < 0: - return '' - if not div: - return re.search(r'[-+]?\d*\.\d+|\d+', html[i+l:i+l+50]).group() - n = html[i+l:].find('') - if n < 0: + i = m.end() + m = re.search(end, html[n+i:]) + if not m: + break + j = m.start() + str = html[n+i:n+i+j] + if end == 'ms': + num = re.search(r'[-+]?\d*\.\d+|\d+', str) + str = num.group() if num else 'NaN' + if firstonly: + return str + out.append(str) + n += i+j + if firstonly: return '' - return html[i+l:i+l+n] + return out # Function: runSummary # Description: # create a summary of tests in a sub-directory -def runSummary(subdir, local=True): +def runSummary(subdir, local=True, genhtml=False): inpath = os.path.abspath(subdir) outpath = inpath if local: outpath = os.path.abspath('.') print('Generating a summary of folder "%s"' % inpath) + if genhtml: + for dirname, dirnames, filenames in os.walk(subdir): + sysvals.dmesgfile = sysvals.ftracefile = sysvals.htmlfile = '' + for filename in filenames: + if(re.match('.*_dmesg.txt', filename)): + sysvals.dmesgfile = os.path.join(dirname, filename) + elif(re.match('.*_ftrace.txt', filename)): + sysvals.ftracefile = os.path.join(dirname, filename) + sysvals.setOutputFile() + if sysvals.ftracefile and sysvals.htmlfile and \ + not os.path.exists(sysvals.htmlfile): + print('FTRACE: %s' % sysvals.ftracefile) + if sysvals.dmesgfile: + print('DMESG : %s' % sysvals.dmesgfile) + rerunTest() testruns = [] for dirname, dirnames, filenames in os.walk(subdir): for filename in filenames: if(not re.match('.*.html', filename)): continue file = os.path.join(dirname, filename) - html = open(file, 'r').read(10000) - suspend = find_in_html(html, - ['Kernel Suspend: ', 'Kernel Suspend Time: ']) - resume = find_in_html(html, - ['Kernel Resume: ', 'Kernel Resume Time: ']) - line = find_in_html(html, ['
'], True) + html = open(file, 'r').read() + suspend = find_in_html(html, 'Kernel Suspend', 'ms') + resume = find_in_html(html, 'Kernel Resume', 'ms') + line = find_in_html(html, '
', '
') stmp = line.split() - if not suspend or not resume or len(stmp) < 4: + if not suspend or not resume or len(stmp) != 8: continue + try: + dt = datetime.strptime(' '.join(stmp[3:]), '%B %d %Y, %I:%M:%S %p') + except: + continue + tstr = dt.strftime('%Y/%m/%d %H:%M:%S') + error = find_in_html(html, '') + result = 'fail' if error else 'pass' + ilist = [] + e = find_in_html(html, 'class="err"[\w=":;\.%\- ]*>', '→', False) + for i in list(set(e)): + ilist.append('%sx%d' % (i, e.count(i)) if e.count(i) > 1 else i) data = { + 'mode': stmp[2], 'host': stmp[0], 'kernel': stmp[1], - 'mode': stmp[2], - 'time': string.join(stmp[3:], ' '), + 'time': tstr, + 'result': result, + 'issues': ','.join(ilist), 'suspend': suspend, 'resume': resume, 'url': os.path.relpath(file, outpath), } - if len(stmp) == 7: - data['kernel'] = 'unknown' - data['mode'] = stmp[1] - data['time'] = string.join(stmp[2:], ' ') testruns.append(data) outfile = os.path.join(outpath, 'summary.html') print('Summary file: %s' % outfile) @@ -5609,11 +5728,12 @@ def printHelp(): print(' -modes List available suspend modes') print(' -status Test to see if the system is enabled to run this tool') print(' -fpdt Print out the contents of the ACPI Firmware Performance Data Table') + print(' -battery Print out battery info (if available)') print(' -sysinfo Print out system info extracted from BIOS') print(' -devinfo Print out the pm settings of all devices which support runtime suspend') print(' -flist Print the list of functions currently being captured in ftrace') print(' -flistall Print all functions capable of being captured in ftrace') - print(' -summary directory Create a summary of all test in this dir') + print(' -summary dir Create a summary of tests in this dir [-genhtml builds missing html]') print(' [redo]') print(' -ftrace ftracefile Create HTML output using ftrace input (used with -dmesg)') print(' -dmesg dmesgfile Create HTML output using dmesg (used with -ftrace)') @@ -5623,8 +5743,9 @@ def printHelp(): # ----------------- MAIN -------------------- # exec start (skipped if script is loaded as library) if __name__ == '__main__': + genhtml = False cmd = '' - simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status'] + simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status', '-battery'] if '-f' in sys.argv: sysvals.cgskip = sysvals.configFile('cgskip.txt') # loop through the command line arguments @@ -5660,6 +5781,8 @@ def printHelp(): sysvals.skiphtml = True elif(arg == '-cgdump'): sysvals.cgdump = True + elif(arg == '-genhtml'): + genhtml = True elif(arg == '-addlogs'): sysvals.dmesglog = sysvals.ftracelog = True elif(arg == '-verbose'): @@ -5856,6 +5979,8 @@ def printHelp(): statusCheck(True) elif(cmd == 'fpdt'): getFPDT(True) + elif(cmd == 'battery'): + print 'AC Connect: %s\nCharge: %d' % getBattery() elif(cmd == 'sysinfo'): sysvals.printSystemInfo(True) elif(cmd == 'devinfo'): @@ -5867,7 +5992,7 @@ def printHelp(): elif(cmd == 'flistall'): sysvals.getFtraceFilterFunctions(False) elif(cmd == 'summary'): - runSummary(sysvals.outdir, True) + runSummary(sysvals.outdir, True, genhtml) sys.exit() # if instructed, re-analyze existing data files @@ -5920,7 +6045,7 @@ def printHelp(): print('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count'])) sysvals.logmsg = '' if not sysvals.skiphtml: - runSummary(sysvals.outdir, False) + runSummary(sysvals.outdir, False, False) sysvals.sudouser(sysvals.outdir) else: if sysvals.outdir: diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py index 29f50d4cfea085..84e2b648e622ff 100755 --- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py +++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py @@ -28,6 +28,7 @@ import os import time import re +import signal import sys import getopt import Gnuplot @@ -78,11 +79,12 @@ def print_help(): print(' Or') print(' ./intel_pstate_tracer.py [--cpu cpus] ---trace_file --name ') print(' To generate trace file, parse and plot, use (sudo required):') - print(' sudo ./intel_pstate_tracer.py [-c cpus] -i -n ') + print(' sudo ./intel_pstate_tracer.py [-c cpus] -i -n -m ') print(' Or') - print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval --name ') + print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval --name --memory ') print(' Optional argument:') - print(' cpus: comma separated list of CPUs') + print(' cpus: comma separated list of CPUs') + print(' kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240') print(' Output:') print(' If not already present, creates a "results/test_name" folder in the current working directory with:') print(' cpu.csv - comma seperated values file with trace contents and some additional calculations.') @@ -379,7 +381,7 @@ def clear_trace_file(): f_handle.close() except: print('IO error clearing trace file ') - quit() + sys.exit(2) def enable_trace(): """ Enable trace """ @@ -389,7 +391,7 @@ def enable_trace(): , 'w').write("1") except: print('IO error enabling trace ') - quit() + sys.exit(2) def disable_trace(): """ Disable trace """ @@ -399,17 +401,17 @@ def disable_trace(): , 'w').write("0") except: print('IO error disabling trace ') - quit() + sys.exit(2) def set_trace_buffer_size(): """ Set trace buffer size """ try: - open('/sys/kernel/debug/tracing/buffer_size_kb' - , 'w').write("10240") + with open('/sys/kernel/debug/tracing/buffer_size_kb', 'w') as fp: + fp.write(memory) except: - print('IO error setting trace buffer size ') - quit() + print('IO error setting trace buffer size ') + sys.exit(2) def free_trace_buffer(): """ Free the trace buffer memory """ @@ -418,8 +420,8 @@ def free_trace_buffer(): open('/sys/kernel/debug/tracing/buffer_size_kb' , 'w').write("1") except: - print('IO error setting trace buffer size ') - quit() + print('IO error freeing trace buffer ') + sys.exit(2) def read_trace_data(filename): """ Read and parse trace data """ @@ -431,7 +433,7 @@ def read_trace_data(filename): data = open(filename, 'r').read() except: print('Error opening ', filename) - quit() + sys.exit(2) for line in data.splitlines(): search_obj = \ @@ -489,10 +491,22 @@ def read_trace_data(filename): # Now seperate the main overall csv file into per CPU csv files. split_csv() +def signal_handler(signal, frame): + print(' SIGINT: Forcing cleanup before exit.') + if interval: + disable_trace() + clear_trace_file() + # Free the memory + free_trace_buffer() + sys.exit(0) + +signal.signal(signal.SIGINT, signal_handler) + interval = "" filename = "" cpu_list = "" testname = "" +memory = "10240" graph_data_present = False; valid1 = False @@ -501,7 +515,7 @@ def read_trace_data(filename): cpu_mask = zeros((MAX_CPUS,), dtype=int) try: - opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:",["help","trace_file=","interval=","cpu=","name="]) + opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="]) except getopt.GetoptError: print_help() sys.exit(2) @@ -521,6 +535,8 @@ def read_trace_data(filename): elif opt in ("-n", "--name"): valid2 = True testname = arg + elif opt in ("-m", "--memory"): + memory = arg if not (valid1 and valid2): print_help() @@ -569,6 +585,11 @@ def read_trace_data(filename): read_trace_data(filename) +clear_trace_file() +# Free the memory +if interval: + free_trace_buffer() + if graph_data_present == False: print('No valid data to plot') sys.exit(2) @@ -593,9 +614,4 @@ def read_trace_data(filename): for f in files: fix_ownership(f) -clear_trace_file() -# Free the memory -if interval: - free_trace_buffer() - os.chdir('../../') diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index a9bc914a8fe816..2ab25aa382638c 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -25,4 +25,4 @@ install : turbostat install -d $(DESTDIR)$(PREFIX)/bin install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat install -d $(DESTDIR)$(PREFIX)/share/man/man8 - install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 + install -m 644 turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index ccf2a69365ccbb..ca9ef701762492 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -54,9 +54,12 @@ name as necessary to disambiguate it from others is necessary. Note that option .PP \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 .PP -\fB--hide column\fP do not show the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. +\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. .PP -\fB--show column\fP show only the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. +\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec" and "Time_Of_Day_Seconds". +The column name "all" can be used to enable all disabled-by-default built-in counters. +.PP +\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. .PP \fB--Dump\fP displays the raw counter values. .PP @@ -64,6 +67,8 @@ name as necessary to disambiguate it from others is necessary. Note that option .PP \fB--interval seconds\fP overrides the default 5.0 second measurement interval. .PP +\fB--num_iterations num\fP number of the measurement iterations. +.PP \fB--out output_file\fP turbostat output is written to the specified output_file. The file is truncated if it already exists, and it is created if it does not exist. .PP @@ -86,6 +91,8 @@ displays the statistics gathered since it was forked. The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system. .SH COLUMN DESCRIPTIONS .nf +\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus. +\fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU. \fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. \fBPackage\fP processor package number -- not present on systems with a single processor package. @@ -262,6 +269,21 @@ CPU PRF_CTRL .fi +.SH INPUT + +For interval-mode, turbostat will immediately end the current interval +when it sees a newline on standard input. +turbostat will then start the next interval. +Control-C will be send a SIGINT to turbostat, +which will immediately abort the program with no further processing. +.SH SIGNALS + +SIGINT will interrupt interval-mode. +The end-of-interval data will be collected and displayed before turbostat exits. + +SIGUSR1 will end current interval, +end-of-interval data will be collected and displayed before turbostat +starts a new interval. .SH NOTES .B "turbostat " diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bd9c6b31a504df..d6cff3070ebde6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -47,9 +48,13 @@ char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; +struct timeval interval_tv = {5, 0}; struct timespec interval_ts = {5, 0}; +struct timespec one_msec = {0, 1000000}; +unsigned int num_iterations; unsigned int debug; unsigned int quiet; +unsigned int shown; unsigned int sums_need_wide_columns; unsigned int rapl_joules; unsigned int summary_only; @@ -58,6 +63,7 @@ unsigned int dump_only; unsigned int do_snb_cstates; unsigned int do_knl_cstates; unsigned int do_slm_cstates; +unsigned int do_cnl_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; unsigned int has_epb; @@ -80,6 +86,8 @@ unsigned int do_rapl; unsigned int do_dts; unsigned int do_ptm; unsigned long long gfx_cur_rc6_ms; +unsigned long long cpuidle_cur_cpu_lpi_us; +unsigned long long cpuidle_cur_sys_lpi_us; unsigned int gfx_cur_mhz; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; @@ -87,6 +95,7 @@ double rapl_power_units, rapl_time_units; double rapl_dram_energy_units, rapl_energy_units; double rapl_joule_counter_range; unsigned int do_core_perf_limit_reasons; +unsigned int has_automatic_cstate_conversion; unsigned int do_gfx_perf_limit_reasons; unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; @@ -147,7 +156,9 @@ char *progname; #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset; size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; -#define MAX_ADDED_COUNTERS 16 +#define MAX_ADDED_COUNTERS 8 +#define MAX_ADDED_THREAD_COUNTERS 24 +#define BITMASK_SIZE 32 struct thread_data { struct timeval tv_begin; @@ -162,7 +173,7 @@ struct thread_data { unsigned int flags; #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 - unsigned long long counter[MAX_ADDED_COUNTERS]; + unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; } *thread_even, *thread_odd; struct core_data { @@ -183,6 +194,8 @@ struct pkg_data { unsigned long long pc8; unsigned long long pc9; unsigned long long pc10; + unsigned long long cpu_lpi; + unsigned long long sys_lpi; unsigned long long pkg_wtd_core_c0; unsigned long long pkg_any_core_c0; unsigned long long pkg_any_gfxe_c0; @@ -203,12 +216,21 @@ struct pkg_data { #define ODD_COUNTERS thread_odd, core_odd, package_odd #define EVEN_COUNTERS thread_even, core_even, package_even -#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ - (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ - topo.num_threads_per_core + \ - (core_no) * topo.num_threads_per_core + (thread_no)) -#define GET_CORE(core_base, core_no, pkg_no) \ - (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) +#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ + ((thread_base) + \ + ((pkg_no) * \ + topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ + ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ + ((core_no) * topo.threads_per_core) + \ + (thread_no)) + +#define GET_CORE(core_base, core_no, node_no, pkg_no) \ + ((core_base) + \ + ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ + ((node_no) * topo.cores_per_node) + \ + (core_no)) + + #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; @@ -244,14 +266,25 @@ struct system_summary { struct pkg_data packages; } average; +struct cpu_topology { + int physical_package_id; + int logical_cpu_id; + int physical_node_id; + int logical_node_id; /* 0-based count within the package */ + int physical_core_id; + int thread_id; + cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ +} *cpus; struct topo_params { int num_packages; int num_cpus; int num_cores; int max_cpu_num; - int num_cores_per_pkg; - int num_threads_per_core; + int max_node_num; + int nodes_per_pkg; + int cores_per_node; + int threads_per_core; } topo; struct timeval tv_even, tv_odd, tv_delta; @@ -273,27 +306,33 @@ int cpu_is_not_present(int cpu) int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) { - int retval, pkg_no, core_no, thread_no; + int retval, pkg_no, core_no, thread_no, node_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { - for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { - for (thread_no = 0; thread_no < - topo.num_threads_per_core; ++thread_no) { - struct thread_data *t; - struct core_data *c; - struct pkg_data *p; - - t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); - - if (cpu_is_not_present(t->cpu_id)) - continue; - - c = GET_CORE(core_base, core_no, pkg_no); - p = GET_PKG(pkg_base, pkg_no); - - retval = func(t, c, p); - if (retval) - return retval; + for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { + for (node_no = 0; node_no < topo.nodes_per_pkg; + node_no++) { + for (thread_no = 0; thread_no < + topo.threads_per_core; ++thread_no) { + struct thread_data *t; + struct core_data *c; + struct pkg_data *p; + + t = GET_THREAD(thread_base, thread_no, + core_no, node_no, + pkg_no); + + if (cpu_is_not_present(t->cpu_id)) + continue; + + c = GET_CORE(core_base, core_no, + node_no, pkg_no); + p = GET_PKG(pkg_base, pkg_no); + + retval = func(t, c, p); + if (retval) + return retval; + } } } } @@ -346,6 +385,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) * Thus, strings that are proper sub-sets must follow their more specific peers. */ struct msr_counter bic[] = { + { 0x0, "usec" }, + { 0x0, "Time_Of_Day_Seconds" }, { 0x0, "Package" }, { 0x0, "Avg_MHz" }, { 0x0, "Bzy_MHz" }, @@ -369,7 +410,9 @@ struct msr_counter bic[] = { { 0x0, "Pkg%pc7" }, { 0x0, "Pkg%pc8" }, { 0x0, "Pkg%pc9" }, - { 0x0, "Pkg%pc10" }, + { 0x0, "Pk%pc10" }, + { 0x0, "CPU%LPI" }, + { 0x0, "SYS%LPI" }, { 0x0, "PkgWatt" }, { 0x0, "CorWatt" }, { 0x0, "GFXWatt" }, @@ -389,62 +432,72 @@ struct msr_counter bic[] = { { 0x0, "Any%C0" }, { 0x0, "GFX%C0" }, { 0x0, "CPUGFX%" }, + { 0x0, "Node%" }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) -#define BIC_Package (1ULL << 0) -#define BIC_Avg_MHz (1ULL << 1) -#define BIC_Bzy_MHz (1ULL << 2) -#define BIC_TSC_MHz (1ULL << 3) -#define BIC_IRQ (1ULL << 4) -#define BIC_SMI (1ULL << 5) -#define BIC_Busy (1ULL << 6) -#define BIC_CPU_c1 (1ULL << 7) -#define BIC_CPU_c3 (1ULL << 8) -#define BIC_CPU_c6 (1ULL << 9) -#define BIC_CPU_c7 (1ULL << 10) -#define BIC_ThreadC (1ULL << 11) -#define BIC_CoreTmp (1ULL << 12) -#define BIC_CoreCnt (1ULL << 13) -#define BIC_PkgTmp (1ULL << 14) -#define BIC_GFX_rc6 (1ULL << 15) -#define BIC_GFXMHz (1ULL << 16) -#define BIC_Pkgpc2 (1ULL << 17) -#define BIC_Pkgpc3 (1ULL << 18) -#define BIC_Pkgpc6 (1ULL << 19) -#define BIC_Pkgpc7 (1ULL << 20) -#define BIC_Pkgpc8 (1ULL << 21) -#define BIC_Pkgpc9 (1ULL << 22) -#define BIC_Pkgpc10 (1ULL << 23) -#define BIC_PkgWatt (1ULL << 24) -#define BIC_CorWatt (1ULL << 25) -#define BIC_GFXWatt (1ULL << 26) -#define BIC_PkgCnt (1ULL << 27) -#define BIC_RAMWatt (1ULL << 28) -#define BIC_PKG__ (1ULL << 29) -#define BIC_RAM__ (1ULL << 30) -#define BIC_Pkg_J (1ULL << 31) -#define BIC_Cor_J (1ULL << 32) -#define BIC_GFX_J (1ULL << 33) -#define BIC_RAM_J (1ULL << 34) -#define BIC_Core (1ULL << 35) -#define BIC_CPU (1ULL << 36) -#define BIC_Mod_c6 (1ULL << 37) -#define BIC_sysfs (1ULL << 38) -#define BIC_Totl_c0 (1ULL << 39) -#define BIC_Any_c0 (1ULL << 40) -#define BIC_GFX_c0 (1ULL << 41) -#define BIC_CPUGFX (1ULL << 42) - -unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL; -unsigned long long bic_present = BIC_sysfs; +#define BIC_USEC (1ULL << 0) +#define BIC_TOD (1ULL << 1) +#define BIC_Package (1ULL << 2) +#define BIC_Avg_MHz (1ULL << 3) +#define BIC_Bzy_MHz (1ULL << 4) +#define BIC_TSC_MHz (1ULL << 5) +#define BIC_IRQ (1ULL << 6) +#define BIC_SMI (1ULL << 7) +#define BIC_Busy (1ULL << 8) +#define BIC_CPU_c1 (1ULL << 9) +#define BIC_CPU_c3 (1ULL << 10) +#define BIC_CPU_c6 (1ULL << 11) +#define BIC_CPU_c7 (1ULL << 12) +#define BIC_ThreadC (1ULL << 13) +#define BIC_CoreTmp (1ULL << 14) +#define BIC_CoreCnt (1ULL << 15) +#define BIC_PkgTmp (1ULL << 16) +#define BIC_GFX_rc6 (1ULL << 17) +#define BIC_GFXMHz (1ULL << 18) +#define BIC_Pkgpc2 (1ULL << 19) +#define BIC_Pkgpc3 (1ULL << 20) +#define BIC_Pkgpc6 (1ULL << 21) +#define BIC_Pkgpc7 (1ULL << 22) +#define BIC_Pkgpc8 (1ULL << 23) +#define BIC_Pkgpc9 (1ULL << 24) +#define BIC_Pkgpc10 (1ULL << 25) +#define BIC_CPU_LPI (1ULL << 26) +#define BIC_SYS_LPI (1ULL << 27) +#define BIC_PkgWatt (1ULL << 26) +#define BIC_CorWatt (1ULL << 27) +#define BIC_GFXWatt (1ULL << 28) +#define BIC_PkgCnt (1ULL << 29) +#define BIC_RAMWatt (1ULL << 30) +#define BIC_PKG__ (1ULL << 31) +#define BIC_RAM__ (1ULL << 32) +#define BIC_Pkg_J (1ULL << 33) +#define BIC_Cor_J (1ULL << 34) +#define BIC_GFX_J (1ULL << 35) +#define BIC_RAM_J (1ULL << 36) +#define BIC_Core (1ULL << 37) +#define BIC_CPU (1ULL << 38) +#define BIC_Mod_c6 (1ULL << 39) +#define BIC_sysfs (1ULL << 40) +#define BIC_Totl_c0 (1ULL << 41) +#define BIC_Any_c0 (1ULL << 42) +#define BIC_GFX_c0 (1ULL << 43) +#define BIC_CPUGFX (1ULL << 44) +#define BIC_Node (1ULL << 45) + +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD) + +unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs; #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) +#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) + #define MAX_DEFERRED 16 char *deferred_skip_names[MAX_DEFERRED]; int deferred_skip_index; @@ -469,9 +522,10 @@ void help(void) "--cpu cpu-set limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" "--quiet skip decoding system configuration header\n" - "--interval sec Override default 5-second measurement interval\n" + "--interval sec.subsec Override default 5-second measurement interval\n" "--help print this help message\n" "--list list column headers only\n" + "--num_iterations num number of the measurement iterations\n" "--out file create or truncate \"file\" for all output\n" "--version print version information\n" "\n" @@ -496,6 +550,9 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) if (comma) *comma = '\0'; + if (!strcmp(name_list, "all")) + return ~0; + for (i = 0; i < MAX_BIC; ++i) { if (!strcmp(name_list, bic[i].name)) { retval |= (1ULL << i); @@ -532,10 +589,14 @@ void print_header(char *delim) struct msr_counter *mp; int printed = 0; - if (debug) - outp += sprintf(outp, "usec %s", delim); + if (DO_BIC(BIC_USEC)) + outp += sprintf(outp, "%susec", (printed++ ? delim : "")); + if (DO_BIC(BIC_TOD)) + outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); + if (DO_BIC(BIC_Node)) + outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU)) @@ -576,7 +637,7 @@ void print_header(char *delim) if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) + if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_c6)) outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); @@ -635,6 +696,10 @@ void print_header(char *delim) outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU_LPI)) + outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); + if (DO_BIC(BIC_SYS_LPI)) + outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); if (do_rapl && !rapl_joules) { if (DO_BIC(BIC_PkgWatt)) @@ -739,6 +804,9 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "pc8: %016llX\n", p->pc8); outp += sprintf(outp, "pc9: %016llX\n", p->pc9); outp += sprintf(outp, "pc10: %016llX\n", p->pc10); + outp += sprintf(outp, "pc10: %016llX\n", p->pc10); + outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); + outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg); outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores); outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx); @@ -786,7 +854,7 @@ int format_counters(struct thread_data *t, struct core_data *c, (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) return 0; - if (debug) { + if (DO_BIC(BIC_USEC)) { /* on each row, print how many usec each timestamp took to gather */ struct timeval tv; @@ -794,6 +862,10 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); } + /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */ + if (DO_BIC(BIC_TOD)) + outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); + interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; tsc = t->tsc * tsc_tweak; @@ -802,6 +874,8 @@ int format_counters(struct thread_data *t, struct core_data *c, if (t == &average.threads) { if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_Node)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU)) @@ -813,6 +887,15 @@ int format_counters(struct thread_data *t, struct core_data *c, else outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } + if (DO_BIC(BIC_Node)) { + if (t) + outp += sprintf(outp, "%s%d", + (printed++ ? delim : ""), + cpus[t->cpu_id].physical_node_id); + else + outp += sprintf(outp, "%s-", + (printed++ ? delim : "")); + } if (DO_BIC(BIC_Core)) { if (c) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); @@ -882,7 +965,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) + if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc); if (DO_BIC(BIC_CPU_c6)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc); @@ -959,6 +1042,11 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc); + if (DO_BIC(BIC_CPU_LPI)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float); + if (DO_BIC(BIC_SYS_LPI)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); + /* * If measurement interval exceeds minimum RAPL Joule Counter range, * indicate that results are suspect by printing "**" in fraction place. @@ -1006,7 +1094,8 @@ int format_counters(struct thread_data *t, struct core_data *c, } done: - outp += sprintf(outp, "\n"); + if (*(outp - 1) != '\n') + outp += sprintf(outp, "\n"); return 0; } @@ -1083,6 +1172,8 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->pc8 = new->pc8 - old->pc8; old->pc9 = new->pc9 - old->pc9; old->pc10 = new->pc10 - old->pc10; + old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; + old->sys_lpi = new->sys_lpi - old->sys_lpi; old->pkg_temp_c = new->pkg_temp_c; /* flag an error when rc6 counter resets/wraps */ @@ -1140,6 +1231,15 @@ delta_thread(struct thread_data *new, struct thread_data *old, int i; struct msr_counter *mp; + /* + * the timestamps from start of measurement interval are in "old" + * the timestamp from end of measurement interval are in "new" + * over-write old w/ new so we can print end of interval values + */ + + old->tv_begin = new->tv_begin; + old->tv_end = new->tv_end; + old->tsc = new->tsc - old->tsc; /* check for TSC < 1 Mcycles over interval */ @@ -1228,6 +1328,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data int i; struct msr_counter *mp; + t->tv_begin.tv_sec = 0; + t->tv_begin.tv_usec = 0; + t->tv_end.tv_sec = 0; + t->tv_end.tv_usec = 0; + t->tsc = 0; t->aperf = 0; t->mperf = 0; @@ -1260,6 +1365,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->pc8 = 0; p->pc9 = 0; p->pc10 = 0; + p->cpu_lpi = 0; + p->sys_lpi = 0; p->energy_pkg = 0; p->energy_dram = 0; @@ -1286,6 +1393,13 @@ int sum_counters(struct thread_data *t, struct core_data *c, int i; struct msr_counter *mp; + /* remember first tv_begin */ + if (average.threads.tv_begin.tv_sec == 0) + average.threads.tv_begin = t->tv_begin; + + /* remember last tv_end */ + average.threads.tv_end = t->tv_end; + average.threads.tsc += t->tsc; average.threads.aperf += t->aperf; average.threads.mperf += t->mperf; @@ -1341,6 +1455,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.pc9 += p->pc9; average.packages.pc10 += p->pc10; + average.packages.cpu_lpi = p->cpu_lpi; + average.packages.sys_lpi = p->sys_lpi; + average.packages.energy_pkg += p->energy_pkg; average.packages.energy_dram += p->energy_dram; average.packages.energy_cores += p->energy_cores; @@ -1487,7 +1604,7 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) if (get_msr(cpu, mp->msr_num, counterp)) return -1; } else { - char path[128]; + char path[128 + PATH_BYTES]; if (mp->flags & SYSFS_PERCPU) { sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", @@ -1603,7 +1720,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) { + if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; } @@ -1684,6 +1801,11 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10)) return -13; + if (DO_BIC(BIC_CPU_LPI)) + p->cpu_lpi = cpuidle_cur_cpu_lpi_us; + if (DO_BIC(BIC_SYS_LPI)) + p->sys_lpi = cpuidle_cur_sys_lpi_us; + if (do_rapl & RAPL_PKG) { if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) return -13; @@ -1769,7 +1891,7 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; static void @@ -2071,12 +2193,9 @@ dump_nhm_cst_cfg(void) get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); - fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", + fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", @@ -2084,6 +2203,15 @@ dump_nhm_cst_cfg(void) (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); + +#define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) + if (has_automatic_cstate_conversion) { + fprintf(outf, ", automatic c-state conversion=%s", + (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); + } + + fprintf(outf, ")\n"); + return; } @@ -2184,6 +2312,8 @@ void free_fd_percpu(void) void free_all_buffers(void) { + int i; + CPU_FREE(cpu_present_set); cpu_present_set = NULL; cpu_present_setsize = 0; @@ -2216,6 +2346,12 @@ void free_all_buffers(void) free(irq_column_2_cpu); free(irqs_per_cpu); + + for (i = 0; i <= topo.max_cpu_num; ++i) { + if (cpus[i].put_ids) + CPU_FREE(cpus[i].put_ids); + } + free(cpus); } @@ -2239,44 +2375,6 @@ int parse_int_file(const char *fmt, ...) return value; } -/* - * get_cpu_position_in_core(cpu) - * return the position of the CPU among its HT siblings in the core - * return -1 if the sibling is not in list - */ -int get_cpu_position_in_core(int cpu) -{ - char path[64]; - FILE *filep; - int this_cpu; - char character; - int i; - - sprintf(path, - "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", - cpu); - filep = fopen(path, "r"); - if (filep == NULL) { - perror(path); - exit(1); - } - - for (i = 0; i < topo.num_threads_per_core; i++) { - fscanf(filep, "%d", &this_cpu); - if (this_cpu == cpu) { - fclose(filep); - return i; - } - - /* Account for no separator after last thread*/ - if (i != (topo.num_threads_per_core - 1)) - fscanf(filep, "%c", &character); - } - - fclose(filep); - return -1; -} - /* * cpu_is_first_core_in_package(cpu) * return 1 if given CPU is 1st core in package @@ -2296,35 +2394,115 @@ int get_core_id(int cpu) return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); } -int get_num_ht_siblings(int cpu) +void set_node_data(void) { char path[80]; FILE *filep; - int sib1; - int matches = 0; - char character; - char str[100]; - char *ch; + int pkg, node, cpu; - sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); - filep = fopen_or_die(path, "r"); + struct pkg_node_info { + int count; + int min; + } *pni; - /* - * file format: - * A ',' separated or '-' separated set of numbers - * (eg 1-2 or 1,3,4,5) - */ - fscanf(filep, "%d%c\n", &sib1, &character); - fseek(filep, 0, SEEK_SET); - fgets(str, 100, filep); - ch = strchr(str, character); - while (ch != NULL) { - matches++; - ch = strchr(ch+1, character); + pni = calloc(topo.num_packages, sizeof(struct pkg_node_info)); + if (!pni) + err(1, "calloc pkg_node_count"); + + for (pkg = 0; pkg < topo.num_packages; pkg++) + pni[pkg].min = topo.num_cpus; + + for (node = 0; node <= topo.max_node_num; node++) { + /* find the "first" cpu in the node */ + sprintf(path, "/sys/bus/node/devices/node%d/cpulist", node); + filep = fopen(path, "r"); + if (!filep) + continue; + fscanf(filep, "%d", &cpu); + fclose(filep); + + pkg = cpus[cpu].physical_package_id; + pni[pkg].count++; + + if (node < pni[pkg].min) + pni[pkg].min = node; } + for (pkg = 0; pkg < topo.num_packages; pkg++) + if (pni[pkg].count > topo.nodes_per_pkg) + topo.nodes_per_pkg = pni[0].count; + + for (cpu = 0; cpu < topo.num_cpus; cpu++) { + pkg = cpus[cpu].physical_package_id; + node = cpus[cpu].physical_node_id; + cpus[cpu].logical_node_id = node - pni[pkg].min; + } + free(pni); + +} + +int get_physical_node_id(struct cpu_topology *thiscpu) +{ + char path[80]; + FILE *filep; + int i; + int cpu = thiscpu->logical_cpu_id; + + for (i = 0; i <= topo.max_cpu_num; i++) { + sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", + cpu, i); + filep = fopen(path, "r"); + if (!filep) + continue; + fclose(filep); + return i; + } + return -1; +} + +int get_thread_siblings(struct cpu_topology *thiscpu) +{ + char path[80], character; + FILE *filep; + unsigned long map; + int so, shift, sib_core; + int cpu = thiscpu->logical_cpu_id; + int offset = topo.max_cpu_num + 1; + size_t size; + int thread_id = 0; + + thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); + if (thiscpu->thread_id < 0) + thiscpu->thread_id = thread_id++; + if (!thiscpu->put_ids) + return -1; + + size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(size, thiscpu->put_ids); + + sprintf(path, + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); + filep = fopen_or_die(path, "r"); + do { + offset -= BITMASK_SIZE; + fscanf(filep, "%lx%c", &map, &character); + for (shift = 0; shift < BITMASK_SIZE; shift++) { + if ((map >> shift) & 0x1) { + so = shift + offset; + sib_core = get_core_id(so); + if (sib_core == thiscpu->physical_core_id) { + CPU_SET_S(so, size, thiscpu->put_ids); + if ((so != cpu) && + (cpus[so].thread_id < 0)) + cpus[so].thread_id = + thread_id++; + } + } + } + } while (!strncmp(&character, ",", 1)); fclose(filep); - return matches+1; + + return CPU_COUNT_S(size, thiscpu->put_ids); } /* @@ -2339,32 +2517,42 @@ int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) { - int retval, pkg_no, core_no, thread_no; + int retval, pkg_no, node_no, core_no, thread_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { - for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { - for (thread_no = 0; thread_no < - topo.num_threads_per_core; ++thread_no) { - struct thread_data *t, *t2; - struct core_data *c, *c2; - struct pkg_data *p, *p2; - - t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); - - if (cpu_is_not_present(t->cpu_id)) - continue; - - t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); - - c = GET_CORE(core_base, core_no, pkg_no); - c2 = GET_CORE(core_base2, core_no, pkg_no); - - p = GET_PKG(pkg_base, pkg_no); - p2 = GET_PKG(pkg_base2, pkg_no); - - retval = func(t, c, p, t2, c2, p2); - if (retval) - return retval; + for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { + for (core_no = 0; core_no < topo.cores_per_node; + ++core_no) { + for (thread_no = 0; thread_no < + topo.threads_per_core; ++thread_no) { + struct thread_data *t, *t2; + struct core_data *c, *c2; + struct pkg_data *p, *p2; + + t = GET_THREAD(thread_base, thread_no, + core_no, node_no, + pkg_no); + + if (cpu_is_not_present(t->cpu_id)) + continue; + + t2 = GET_THREAD(thread_base2, thread_no, + core_no, node_no, + pkg_no); + + c = GET_CORE(core_base, core_no, + node_no, pkg_no); + c2 = GET_CORE(core_base2, core_no, + node_no, + pkg_no); + + p = GET_PKG(pkg_base, pkg_no); + p2 = GET_PKG(pkg_base2, pkg_no); + + retval = func(t, c, p, t2, c2, p2); + if (retval) + return retval; + } } } } @@ -2409,6 +2597,20 @@ void re_initialize(void) printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); } +void set_max_cpu_num(void) +{ + FILE *filep; + unsigned long dummy; + + topo.max_cpu_num = 0; + filep = fopen_or_die( + "/sys/devices/system/cpu/cpu0/topology/thread_siblings", + "r"); + while (fscanf(filep, "%lx,", &dummy) == 1) + topo.max_cpu_num += BITMASK_SIZE; + fclose(filep); + topo.max_cpu_num--; /* 0 based */ +} /* * count_cpus() @@ -2416,10 +2618,7 @@ void re_initialize(void) */ int count_cpus(int cpu) { - if (topo.max_cpu_num < cpu) - topo.max_cpu_num = cpu; - - topo.num_cpus += 1; + topo.num_cpus++; return 0; } int mark_cpu_present(int cpu) @@ -2428,6 +2627,12 @@ int mark_cpu_present(int cpu) return 0; } +int init_thread_id(int cpu) +{ + cpus[cpu].thread_id = -1; + return 0; +} + /* * snapshot_proc_interrupts() * @@ -2541,6 +2746,52 @@ int snapshot_gfx_mhz(void) return 0; } +/* + * snapshot_cpu_lpi() + * + * record snapshot of + * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_cpu_lpi_us(void) +{ + FILE *fp; + int retval; + + fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); + + retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); + if (retval != 1) + err(1, "CPU LPI"); + + fclose(fp); + + return 0; +} +/* + * snapshot_sys_lpi() + * + * record snapshot of + * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_sys_lpi_us(void) +{ + FILE *fp; + int retval; + + fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r"); + + retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); + if (retval != 1) + err(1, "SYS LPI"); + + fclose(fp); + + return 0; +} /* * snapshot /proc and /sys files * @@ -2558,13 +2809,83 @@ int snapshot_proc_sysfs_files(void) if (DO_BIC(BIC_GFXMHz)) snapshot_gfx_mhz(); + if (DO_BIC(BIC_CPU_LPI)) + snapshot_cpu_lpi_us(); + + if (DO_BIC(BIC_SYS_LPI)) + snapshot_sys_lpi_us(); + return 0; } +int exit_requested; + +static void signal_handler (int signal) +{ + switch (signal) { + case SIGINT: + exit_requested = 1; + if (debug) + fprintf(stderr, " SIGINT\n"); + break; + case SIGUSR1: + if (debug > 1) + fprintf(stderr, "SIGUSR1\n"); + break; + } + /* make sure this manually-invoked interval is at least 1ms long */ + nanosleep(&one_msec, NULL); +} + +void setup_signal_handler(void) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + + sa.sa_handler = &signal_handler; + + if (sigaction(SIGINT, &sa, NULL) < 0) + err(1, "sigaction SIGINT"); + if (sigaction(SIGUSR1, &sa, NULL) < 0) + err(1, "sigaction SIGUSR1"); +} + +void do_sleep(void) +{ + struct timeval select_timeout; + fd_set readfds; + int retval; + + FD_ZERO(&readfds); + FD_SET(0, &readfds); + + if (!isatty(fileno(stdin))) { + nanosleep(&interval_ts, NULL); + return; + } + + select_timeout = interval_tv; + retval = select(1, &readfds, NULL, NULL, &select_timeout); + + if (retval == 1) { + switch (getc(stdin)) { + case 'q': + exit_requested = 1; + break; + } + /* make sure this manually-invoked interval is at least 1ms long */ + nanosleep(&one_msec, NULL); + } +} + void turbostat_loop() { int retval; int restarted = 0; + int done_iters = 0; + + setup_signal_handler(); restart: restarted++; @@ -2581,6 +2902,7 @@ void turbostat_loop() goto restart; } restarted = 0; + done_iters = 0; gettimeofday(&tv_even, (struct timezone *)NULL); while (1) { @@ -2588,7 +2910,7 @@ void turbostat_loop() re_initialize(); goto restart; } - nanosleep(&interval_ts, NULL); + do_sleep(); if (snapshot_proc_sysfs_files()) goto restart; retval = for_all_cpus(get_counters, ODD_COUNTERS); @@ -2607,7 +2929,11 @@ void turbostat_loop() compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); flush_output_stdout(); - nanosleep(&interval_ts, NULL); + if (exit_requested) + break; + if (num_iterations && ++done_iters >= num_iterations) + break; + do_sleep(); if (snapshot_proc_sysfs_files()) goto restart; retval = for_all_cpus(get_counters, EVEN_COUNTERS); @@ -2626,6 +2952,10 @@ void turbostat_loop() compute_average(ODD_COUNTERS); format_all_counters(ODD_COUNTERS); flush_output_stdout(); + if (exit_requested) + break; + if (num_iterations && ++done_iters >= num_iterations) + break; } } @@ -2740,6 +3070,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ pkg_cstate_limits = hsw_pkg_cstate_limits; has_misc_feature_control = 1; break; @@ -2945,6 +3276,7 @@ int has_config_tdp(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ @@ -3399,6 +3731,7 @@ void rapl_probe(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; BIC_PRESENT(BIC_PKG__); BIC_PRESENT(BIC_RAM__); @@ -3523,6 +3856,12 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model) } } +void automatic_cstate_conversion_probe(unsigned int family, unsigned int model) +{ + if (is_skx(family, model) || is_bdx(family, model)) + has_automatic_cstate_conversion = 1; +} + int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; @@ -3728,6 +4067,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GEMINI_LAKE: @@ -3761,6 +4101,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GEMINI_LAKE: return 1; @@ -3786,6 +4127,7 @@ int has_skl_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ return 1; } return 0; @@ -3815,6 +4157,19 @@ int is_knl(unsigned int family, unsigned int model) return 0; } +int is_cnl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ + return 1; + } + + return 0; +} + unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) { if (is_knl(family, model)) @@ -3947,7 +4302,7 @@ void decode_misc_enable_msr(void) base_cpu, msr, msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", - msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "", + msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); } @@ -4152,7 +4507,6 @@ void process_cpuid() case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ crystal_hz = 24000000; /* 24.0 MHz */ break; - case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ crystal_hz = 25000000; /* 25.0 MHz */ break; @@ -4253,6 +4607,7 @@ void process_cpuid() } do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); + do_cnl_cstates = is_cnl(family, model); if (!quiet) decode_misc_pwr_mgmt_msr(); @@ -4262,6 +4617,7 @@ void process_cpuid() rapl_probe(family, model); perf_limit_reasons_probe(family, model); + automatic_cstate_conversion_probe(family, model); if (!quiet) dump_cstate_pstate_config_info(family, model); @@ -4280,6 +4636,16 @@ void process_cpuid() if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) BIC_PRESENT(BIC_GFXMHz); + if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) + BIC_PRESENT(BIC_CPU_LPI); + else + BIC_NOT_PRESENT(BIC_CPU_LPI); + + if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK)) + BIC_PRESENT(BIC_SYS_LPI); + else + BIC_NOT_PRESENT(BIC_SYS_LPI); + if (!quiet) decode_misc_feature_control(); @@ -4310,14 +4676,10 @@ void topology_probe() int max_core_id = 0; int max_package_id = 0; int max_siblings = 0; - struct cpu_topology { - int core_id; - int physical_package_id; - } *cpus; /* Initialize num_cpus, max_cpu_num */ + set_max_cpu_num(); topo.num_cpus = 0; - topo.max_cpu_num = 0; for_all_proc_cpus(count_cpus); if (!summary_only && topo.num_cpus > 1) BIC_PRESENT(BIC_CPU); @@ -4357,6 +4719,7 @@ void topology_probe() cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); + for_all_proc_cpus(init_thread_id); /* * For online cpus @@ -4370,26 +4733,45 @@ void topology_probe() fprintf(outf, "cpu%d NOT PRESENT\n", i); continue; } - cpus[i].core_id = get_core_id(i); - if (cpus[i].core_id > max_core_id) - max_core_id = cpus[i].core_id; + cpus[i].logical_cpu_id = i; + + /* get package information */ cpus[i].physical_package_id = get_physical_package_id(i); if (cpus[i].physical_package_id > max_package_id) max_package_id = cpus[i].physical_package_id; - siblings = get_num_ht_siblings(i); + /* get numa node information */ + cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); + if (cpus[i].physical_node_id > topo.max_node_num) + topo.max_node_num = cpus[i].physical_node_id; + + /* get core information */ + cpus[i].physical_core_id = get_core_id(i); + if (cpus[i].physical_core_id > max_core_id) + max_core_id = cpus[i].physical_core_id; + + /* get thread information */ + siblings = get_thread_siblings(&cpus[i]); if (siblings > max_siblings) max_siblings = siblings; + if (cpus[i].thread_id != -1) + topo.num_cores++; + if (debug > 1) - fprintf(outf, "cpu %d pkg %d core %d\n", - i, cpus[i].physical_package_id, cpus[i].core_id); + fprintf(outf, + "cpu %d pkg %d node %d core %d thread %d\n", + i, cpus[i].physical_package_id, + cpus[i].physical_node_id, + cpus[i].physical_core_id, + cpus[i].thread_id); } - topo.num_cores_per_pkg = max_core_id + 1; + + topo.cores_per_node = max_core_id + 1; if (debug > 1) fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", - max_core_id, topo.num_cores_per_pkg); - if (!summary_only && topo.num_cores_per_pkg > 1) + max_core_id, topo.cores_per_node); + if (!summary_only && topo.cores_per_node > 1) BIC_PRESENT(BIC_Core); topo.num_packages = max_package_id + 1; @@ -4399,33 +4781,38 @@ void topology_probe() if (!summary_only && topo.num_packages > 1) BIC_PRESENT(BIC_Package); - topo.num_threads_per_core = max_siblings; + set_node_data(); if (debug > 1) - fprintf(outf, "max_siblings %d\n", max_siblings); + fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); + if (!summary_only && topo.nodes_per_pkg > 1) + BIC_PRESENT(BIC_Node); - free(cpus); + topo.threads_per_core = max_siblings; + if (debug > 1) + fprintf(outf, "max_siblings %d\n", max_siblings); } void -allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) +allocate_counters(struct thread_data **t, struct core_data **c, + struct pkg_data **p) { int i; + int num_cores = topo.cores_per_node * topo.nodes_per_pkg * + topo.num_packages; + int num_threads = topo.threads_per_core * num_cores; - *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * - topo.num_packages, sizeof(struct thread_data)); + *t = calloc(num_threads, sizeof(struct thread_data)); if (*t == NULL) goto error; - for (i = 0; i < topo.num_threads_per_core * - topo.num_cores_per_pkg * topo.num_packages; i++) + for (i = 0; i < num_threads; i++) (*t)[i].cpu_id = -1; - *c = calloc(topo.num_cores_per_pkg * topo.num_packages, - sizeof(struct core_data)); + *c = calloc(num_cores, sizeof(struct core_data)); if (*c == NULL) goto error; - for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) + for (i = 0; i < num_cores; i++) (*c)[i].core_id = -1; *p = calloc(topo.num_packages, sizeof(struct pkg_data)); @@ -4442,47 +4829,39 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data /* * init_counter() * - * set cpu_id, core_num, pkg_num * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE - * - * increment topo.num_cores when 1st core in pkg seen */ void init_counter(struct thread_data *thread_base, struct core_data *core_base, - struct pkg_data *pkg_base, int thread_num, int core_num, - int pkg_num, int cpu_id) + struct pkg_data *pkg_base, int cpu_id) { + int pkg_id = cpus[cpu_id].physical_package_id; + int node_id = cpus[cpu_id].logical_node_id; + int core_id = cpus[cpu_id].physical_core_id; + int thread_id = cpus[cpu_id].thread_id; struct thread_data *t; struct core_data *c; struct pkg_data *p; - t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); - c = GET_CORE(core_base, core_num, pkg_num); - p = GET_PKG(pkg_base, pkg_num); + t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); + c = GET_CORE(core_base, core_id, node_id, pkg_id); + p = GET_PKG(pkg_base, pkg_id); t->cpu_id = cpu_id; - if (thread_num == 0) { + if (thread_id == 0) { t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; if (cpu_is_first_core_in_package(cpu_id)) t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; } - c->core_id = core_num; - p->package_id = pkg_num; + c->core_id = core_id; + p->package_id = pkg_id; } int initialize_counters(int cpu_id) { - int my_thread_id, my_core_id, my_package_id; - - my_package_id = get_physical_package_id(cpu_id); - my_core_id = get_core_id(cpu_id); - my_thread_id = get_cpu_position_in_core(cpu_id); - if (!my_thread_id) - topo.num_cores++; - - init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); - init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); + init_counter(EVEN_COUNTERS, cpu_id); + init_counter(ODD_COUNTERS, cpu_id); return 0; } @@ -4630,7 +5009,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 17.06.23" + fprintf(outf, "turbostat version 18.06.01" " - Len Brown \n"); } @@ -4661,7 +5040,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp->next = sys.tp; sys.tp = msrp; sys.added_thread_counters++; - if (sys.added_thread_counters > MAX_ADDED_COUNTERS) { + if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) { fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS); exit(-1); @@ -4820,7 +5199,7 @@ void probe_sysfs(void) if (!DO_BIC(BIC_sysfs)) return; - for (state = 10; state > 0; --state) { + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -4847,7 +5226,7 @@ void probe_sysfs(void) FORMAT_PERCENT, SYSFS_PERCPU); } - for (state = 10; state > 0; --state) { + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -4960,34 +5339,6 @@ void parse_cpu_command(char *optarg) exit(-1); } -int shown; -/* - * parse_show_hide() - process cmdline to set default counter action - */ -void parse_show_hide(char *optarg, enum show_hide_mode new_mode) -{ - /* - * --show: show only those specified - * The 1st invocation will clear and replace the enabled mask - * subsequent invocations can add to it. - */ - if (new_mode == SHOW_LIST) { - if (shown == 0) - bic_enabled = bic_lookup(optarg, new_mode); - else - bic_enabled |= bic_lookup(optarg, new_mode); - shown = 1; - - return; - } - - /* - * --hide: do not show those specified - * multiple invocations simply clear more bits in enabled mask - */ - bic_enabled &= ~bic_lookup(optarg, new_mode); - -} void cmdline(int argc, char **argv) { @@ -4998,7 +5349,9 @@ void cmdline(int argc, char **argv) {"cpu", required_argument, 0, 'c'}, {"Dump", no_argument, 0, 'D'}, {"debug", no_argument, 0, 'd'}, /* internal, not documented */ + {"enable", required_argument, 0, 'e'}, {"interval", required_argument, 0, 'i'}, + {"num_iterations", required_argument, 0, 'n'}, {"help", no_argument, 0, 'h'}, {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help {"Joules", no_argument, 0, 'J'}, @@ -5014,7 +5367,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -5026,11 +5379,20 @@ void cmdline(int argc, char **argv) case 'D': dump_only++; break; + case 'e': + /* --enable specified counter */ + bic_enabled |= bic_lookup(optarg, SHOW_LIST); + break; case 'd': debug++; + ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); break; case 'H': - parse_show_hide(optarg, HIDE_LIST); + /* + * --hide: do not show those specified + * multiple invocations simply clear more bits in enabled mask + */ + bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); break; case 'h': default: @@ -5046,7 +5408,8 @@ void cmdline(int argc, char **argv) exit(2); } - interval_ts.tv_sec = interval; + interval_tv.tv_sec = interval_ts.tv_sec = interval; + interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; } break; @@ -5054,6 +5417,7 @@ void cmdline(int argc, char **argv) rapl_joules++; break; case 'l': + ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); list_header_only++; quiet++; break; @@ -5063,8 +5427,26 @@ void cmdline(int argc, char **argv) case 'q': quiet = 1; break; + case 'n': + num_iterations = strtod(optarg, NULL); + + if (num_iterations <= 0) { + fprintf(outf, "iterations %d should be positive number\n", + num_iterations); + exit(2); + } + break; case 's': - parse_show_hide(optarg, SHOW_LIST); + /* + * --show: show only those specified + * The 1st invocation will clear and replace the enabled mask + * subsequent invocations can add to it. + */ + if (shown == 0) + bic_enabled = bic_lookup(optarg, SHOW_LIST); + else + bic_enabled |= bic_lookup(optarg, SHOW_LIST); + shown = 1; break; case 'S': summary_only++; diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index 2447b1bbaacff8..f4534fb8b95158 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -24,5 +24,5 @@ install : x86_energy_perf_policy install -d $(DESTDIR)$(PREFIX)/bin install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy install -d $(DESTDIR)$(PREFIX)/share/man/man8 - install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8 + install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
', '