diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 2f813d644c696d..18b7dc929234f6 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -99,7 +99,7 @@ Description: this file, the suspend image will be as small as possible. Reading from this file will display the current image size - limit, which is set to 500 MB by default. + limit, which is set to around 2/5 of available RAM by default. What: /sys/power/pm_trace Date: August 2006 diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 8f1d3de449b53f..ac6f5c597a5612 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -465,6 +465,13 @@ Next, the following policy attributes have special meaning if policy for the time interval between the last two invocations of the driver's utilization update callback by the CPU scheduler for that CPU. +One more policy attribute is present if the `HWP feature is enabled in the +processor `_: + +``base_frequency`` + Shows the base frequency of the CPU. Any frequency above this will be + in the turbo frequency range. + The meaning of these attributes in the `passive mode `_ is the same as for other scaling drivers. diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index cc87adf44c0a0a..236d1fb1364027 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -56,7 +56,7 @@ If you want to limit the suspend image size to N bytes, do echo N > /sys/power/image_size -before suspend (it is limited to 500 MB by default). +before suspend (it is limited to around 2/5 of available RAM by default). . The resume process checks for the presence of the resume device, if found, it then checks the contents for the hibernation image signature. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1a0be022f91d8d..e8de5de1057f62 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2422,7 +2422,7 @@ menu "Power management and ACPI options" config ARCH_HIBERNATION_HEADER def_bool y - depends on X86_64 && HIBERNATION + depends on HIBERNATION source "kernel/power/Kconfig" diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h index ecffe81ff65cd9..a892494ca5e441 100644 --- a/arch/x86/include/asm/suspend.h +++ b/arch/x86/include/asm/suspend.h @@ -4,3 +4,11 @@ #else # include #endif +extern unsigned long restore_jump_address __visible; +extern unsigned long jump_address_phys; +extern unsigned long restore_cr3 __visible; +extern unsigned long temp_pgt __visible; +extern unsigned long relocated_restore_code __visible; +extern int relocate_restore_code(void); +/* Defined in hibernate_asm_32/64.S */ +extern asmlinkage __visible int restore_image(void); diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 8be6afb584715d..fdbd9d7b7bca13 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -32,4 +32,8 @@ struct saved_context { unsigned long return_address; } __attribute__((packed)); +/* routines for saving/restoring kernel state */ +extern char core_restore_code[]; +extern char restore_registers[]; + #endif /* _ASM_X86_SUSPEND_32_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4866badb235a6..90ecc108bc8a52 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1251,7 +1251,7 @@ void __init setup_arch(char **cmdline_p) x86_init.hyper.guest_late_init(); e820__reserve_resources(); - e820__register_nosave_regions(max_low_pfn); + e820__register_nosave_regions(max_pfn); x86_init.resources.reserve_resources(); diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile index a4701389562ca7..37923d715741ae 100644 --- a/arch/x86/power/Makefile +++ b/arch/x86/power/Makefile @@ -7,4 +7,4 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_cpu.o := $(nostackp) obj-$(CONFIG_PM_SLEEP) += cpu.o -obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o +obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o hibernate.o diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c new file mode 100644 index 00000000000000..bcddf09b5aa391 --- /dev/null +++ b/arch/x86/power/hibernate.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hibernation support for x86 + * + * Copyright (c) 2007 Rafael J. Wysocki + * Copyright (c) 2002 Pavel Machek + * Copyright (c) 2001 Patrick Mochel + */ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Address to jump to in the last phase of restore in order to get to the image + * kernel's text (this value is passed in the image header). + */ +unsigned long restore_jump_address __visible; +unsigned long jump_address_phys; + +/* + * Value of the cr3 register from before the hibernation (this value is passed + * in the image header). + */ +unsigned long restore_cr3 __visible; +unsigned long temp_pgt __visible; +unsigned long relocated_restore_code __visible; + +/** + * pfn_is_nosave - check if given pfn is in the 'nosave' section + */ +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn; + unsigned long nosave_end_pfn; + + nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; + nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; + + return pfn >= nosave_begin_pfn && pfn < nosave_end_pfn; +} + + +#define MD5_DIGEST_SIZE 16 + +struct restore_data_record { + unsigned long jump_address; + unsigned long jump_address_phys; + unsigned long cr3; + unsigned long magic; + u8 e820_digest[MD5_DIGEST_SIZE]; +}; + +#if IS_BUILTIN(CONFIG_CRYPTO_MD5) +/** + * get_e820_md5 - calculate md5 according to given e820 table + * + * @table: the e820 table to be calculated + * @buf: the md5 result to be stored to + */ +static int get_e820_md5(struct e820_table *table, void *buf) +{ + struct crypto_shash *tfm; + struct shash_desc *desc; + int size; + int ret = 0; + + tfm = crypto_alloc_shash("md5", 0, 0); + if (IS_ERR(tfm)) + return -ENOMEM; + + desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm), + GFP_KERNEL); + if (!desc) { + ret = -ENOMEM; + goto free_tfm; + } + + desc->tfm = tfm; + desc->flags = 0; + + size = offsetof(struct e820_table, entries) + + sizeof(struct e820_entry) * table->nr_entries; + + if (crypto_shash_digest(desc, (u8 *)table, size, buf)) + ret = -EINVAL; + + kzfree(desc); + +free_tfm: + crypto_free_shash(tfm); + return ret; +} + +static int hibernation_e820_save(void *buf) +{ + return get_e820_md5(e820_table_firmware, buf); +} + +static bool hibernation_e820_mismatch(void *buf) +{ + int ret; + u8 result[MD5_DIGEST_SIZE]; + + memset(result, 0, MD5_DIGEST_SIZE); + /* If there is no digest in suspend kernel, let it go. */ + if (!memcmp(result, buf, MD5_DIGEST_SIZE)) + return false; + + ret = get_e820_md5(e820_table_firmware, result); + if (ret) + return true; + + return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false; +} +#else +static int hibernation_e820_save(void *buf) +{ + return 0; +} + +static bool hibernation_e820_mismatch(void *buf) +{ + /* If md5 is not builtin for restore kernel, let it go. */ + return false; +} +#endif + +#ifdef CONFIG_X86_64 +#define RESTORE_MAGIC 0x23456789ABCDEF01UL +#else +#define RESTORE_MAGIC 0x12345678UL +#endif + +/** + * arch_hibernation_header_save - populate the architecture specific part + * of a hibernation image header + * @addr: address to save the data at + */ +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct restore_data_record *rdr = addr; + + if (max_size < sizeof(struct restore_data_record)) + return -EOVERFLOW; + rdr->magic = RESTORE_MAGIC; + rdr->jump_address = (unsigned long)restore_registers; + rdr->jump_address_phys = __pa_symbol(restore_registers); + + /* + * The restore code fixes up CR3 and CR4 in the following sequence: + * + * [in hibernation asm] + * 1. CR3 <= temporary page tables + * 2. CR4 <= mmu_cr4_features (from the kernel that restores us) + * 3. CR3 <= rdr->cr3 + * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel) + * [in restore_processor_state()] + * 5. CR4 <= saved CR4 + * 6. CR3 <= saved CR3 + * + * Our mmu_cr4_features has CR4.PCIDE=0, and toggling + * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so + * rdr->cr3 needs to point to valid page tables but must not + * have any of the PCID bits set. + */ + rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK; + + return hibernation_e820_save(rdr->e820_digest); +} + +/** + * arch_hibernation_header_restore - read the architecture specific data + * from the hibernation image header + * @addr: address to read the data from + */ +int arch_hibernation_header_restore(void *addr) +{ + struct restore_data_record *rdr = addr; + + if (rdr->magic != RESTORE_MAGIC) { + pr_crit("Unrecognized hibernate image header format!\n"); + return -EINVAL; + } + + restore_jump_address = rdr->jump_address; + jump_address_phys = rdr->jump_address_phys; + restore_cr3 = rdr->cr3; + + if (hibernation_e820_mismatch(rdr->e820_digest)) { + pr_crit("Hibernate inconsistent memory map detected!\n"); + return -ENODEV; + } + + return 0; +} + +int relocate_restore_code(void) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + relocated_restore_code = get_safe_page(GFP_ATOMIC); + if (!relocated_restore_code) + return -ENOMEM; + + memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); + + /* Make the page containing the relocated code executable */ + pgd = (pgd_t *)__va(read_cr3_pa()) + + pgd_index(relocated_restore_code); + p4d = p4d_offset(pgd, relocated_restore_code); + if (p4d_large(*p4d)) { + set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX)); + goto out; + } + pud = pud_offset(p4d, relocated_restore_code); + if (pud_large(*pud)) { + set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); + goto out; + } + pmd = pmd_offset(pud, relocated_restore_code); + if (pmd_large(*pmd)) { + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX)); + goto out; + } + pte = pte_offset_kernel(pmd, relocated_restore_code); + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX)); +out: + __flush_tlb_all(); + return 0; +} diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index afc4ed7b157826..15695e30f982e6 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -14,9 +14,7 @@ #include #include #include - -/* Defined in hibernate_asm_32.S */ -extern int restore_image(void); +#include /* Pointer to the temporary resume page tables */ pgd_t *resume_pg_dir; @@ -145,6 +143,32 @@ static inline void resume_init_first_level_page_table(pgd_t *pg_dir) #endif } +static int set_up_temporary_text_mapping(pgd_t *pgd_base) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_base + pgd_index(restore_jump_address); + + pmd = resume_one_md_table_init(pgd); + if (!pmd) + return -ENOMEM; + + if (boot_cpu_has(X86_FEATURE_PSE)) { + set_pmd(pmd + pmd_index(restore_jump_address), + __pmd((jump_address_phys & PMD_MASK) | pgprot_val(PAGE_KERNEL_LARGE_EXEC))); + } else { + pte = resume_one_page_table_init(pmd); + if (!pte) + return -ENOMEM; + set_pte(pte + pte_index(restore_jump_address), + __pte((jump_address_phys & PAGE_MASK) | pgprot_val(PAGE_KERNEL_EXEC))); + } + + return 0; +} + asmlinkage int swsusp_arch_resume(void) { int error; @@ -154,22 +178,22 @@ asmlinkage int swsusp_arch_resume(void) return -ENOMEM; resume_init_first_level_page_table(resume_pg_dir); + + error = set_up_temporary_text_mapping(resume_pg_dir); + if (error) + return error; + error = resume_physical_mapping_init(resume_pg_dir); if (error) return error; + temp_pgt = __pa(resume_pg_dir); + + error = relocate_restore_code(); + if (error) + return error; + /* We have got enough memory and from now on we cannot recover */ restore_image(); return 0; } - -/* - * pfn_is_nosave - check if given pfn is in the 'nosave' section - */ - -int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); -} diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index f8e3b668d20b9b..239f424ccb29bf 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -26,26 +26,6 @@ #include #include -/* Defined in hibernate_asm_64.S */ -extern asmlinkage __visible int restore_image(void); - -/* - * Address to jump to in the last phase of restore in order to get to the image - * kernel's text (this value is passed in the image header). - */ -unsigned long restore_jump_address __visible; -unsigned long jump_address_phys; - -/* - * Value of the cr3 register from before the hibernation (this value is passed - * in the image header). - */ -unsigned long restore_cr3 __visible; - -unsigned long temp_level4_pgt __visible; - -unsigned long relocated_restore_code __visible; - static int set_up_temporary_text_mapping(pgd_t *pgd) { pmd_t *pmd; @@ -141,46 +121,7 @@ static int set_up_temporary_mappings(void) return result; } - temp_level4_pgt = __pa(pgd); - return 0; -} - -static int relocate_restore_code(void) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - relocated_restore_code = get_safe_page(GFP_ATOMIC); - if (!relocated_restore_code) - return -ENOMEM; - - memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); - - /* Make the page containing the relocated code executable */ - pgd = (pgd_t *)__va(read_cr3_pa()) + - pgd_index(relocated_restore_code); - p4d = p4d_offset(pgd, relocated_restore_code); - if (p4d_large(*p4d)) { - set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX)); - goto out; - } - pud = pud_offset(p4d, relocated_restore_code); - if (pud_large(*pud)) { - set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); - goto out; - } - pmd = pmd_offset(pud, relocated_restore_code); - if (pmd_large(*pmd)) { - set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX)); - goto out; - } - pte = pte_offset_kernel(pmd, relocated_restore_code); - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX)); -out: - __flush_tlb_all(); + temp_pgt = __pa(pgd); return 0; } @@ -200,166 +141,3 @@ asmlinkage int swsusp_arch_resume(void) restore_image(); return 0; } - -/* - * pfn_is_nosave - check if given pfn is in the 'nosave' section - */ - -int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); -} - -#define MD5_DIGEST_SIZE 16 - -struct restore_data_record { - unsigned long jump_address; - unsigned long jump_address_phys; - unsigned long cr3; - unsigned long magic; - u8 e820_digest[MD5_DIGEST_SIZE]; -}; - -#define RESTORE_MAGIC 0x23456789ABCDEF01UL - -#if IS_BUILTIN(CONFIG_CRYPTO_MD5) -/** - * get_e820_md5 - calculate md5 according to given e820 table - * - * @table: the e820 table to be calculated - * @buf: the md5 result to be stored to - */ -static int get_e820_md5(struct e820_table *table, void *buf) -{ - struct crypto_shash *tfm; - struct shash_desc *desc; - int size; - int ret = 0; - - tfm = crypto_alloc_shash("md5", 0, 0); - if (IS_ERR(tfm)) - return -ENOMEM; - - desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm), - GFP_KERNEL); - if (!desc) { - ret = -ENOMEM; - goto free_tfm; - } - - desc->tfm = tfm; - desc->flags = 0; - - size = offsetof(struct e820_table, entries) + - sizeof(struct e820_entry) * table->nr_entries; - - if (crypto_shash_digest(desc, (u8 *)table, size, buf)) - ret = -EINVAL; - - kzfree(desc); - -free_tfm: - crypto_free_shash(tfm); - return ret; -} - -static void hibernation_e820_save(void *buf) -{ - get_e820_md5(e820_table_firmware, buf); -} - -static bool hibernation_e820_mismatch(void *buf) -{ - int ret; - u8 result[MD5_DIGEST_SIZE]; - - memset(result, 0, MD5_DIGEST_SIZE); - /* If there is no digest in suspend kernel, let it go. */ - if (!memcmp(result, buf, MD5_DIGEST_SIZE)) - return false; - - ret = get_e820_md5(e820_table_firmware, result); - if (ret) - return true; - - return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false; -} -#else -static void hibernation_e820_save(void *buf) -{ -} - -static bool hibernation_e820_mismatch(void *buf) -{ - /* If md5 is not builtin for restore kernel, let it go. */ - return false; -} -#endif - -/** - * arch_hibernation_header_save - populate the architecture specific part - * of a hibernation image header - * @addr: address to save the data at - */ -int arch_hibernation_header_save(void *addr, unsigned int max_size) -{ - struct restore_data_record *rdr = addr; - - if (max_size < sizeof(struct restore_data_record)) - return -EOVERFLOW; - rdr->jump_address = (unsigned long)restore_registers; - rdr->jump_address_phys = __pa_symbol(restore_registers); - - /* - * The restore code fixes up CR3 and CR4 in the following sequence: - * - * [in hibernation asm] - * 1. CR3 <= temporary page tables - * 2. CR4 <= mmu_cr4_features (from the kernel that restores us) - * 3. CR3 <= rdr->cr3 - * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel) - * [in restore_processor_state()] - * 5. CR4 <= saved CR4 - * 6. CR3 <= saved CR3 - * - * Our mmu_cr4_features has CR4.PCIDE=0, and toggling - * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so - * rdr->cr3 needs to point to valid page tables but must not - * have any of the PCID bits set. - */ - rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK; - - rdr->magic = RESTORE_MAGIC; - - hibernation_e820_save(rdr->e820_digest); - - return 0; -} - -/** - * arch_hibernation_header_restore - read the architecture specific data - * from the hibernation image header - * @addr: address to read the data from - */ -int arch_hibernation_header_restore(void *addr) -{ - struct restore_data_record *rdr = addr; - - restore_jump_address = rdr->jump_address; - jump_address_phys = rdr->jump_address_phys; - restore_cr3 = rdr->cr3; - - if (rdr->magic != RESTORE_MAGIC) { - pr_crit("Unrecognized hibernate image header format!\n"); - return -EINVAL; - } - - if (hibernation_e820_mismatch(rdr->e820_digest)) { - pr_crit("Hibernate inconsistent memory map detected!\n"); - return -ENODEV; - } - - return 0; -} diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index 6e56815e13a0aa..6fe383002125f6 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -12,6 +12,7 @@ #include #include #include +#include .text @@ -24,13 +25,30 @@ ENTRY(swsusp_arch_suspend) pushfl popl saved_context_eflags + /* save cr3 */ + movl %cr3, %eax + movl %eax, restore_cr3 + + FRAME_BEGIN call swsusp_save + FRAME_END ret +ENDPROC(swsusp_arch_suspend) ENTRY(restore_image) + /* prepare to jump to the image kernel */ + movl restore_jump_address, %ebx + movl restore_cr3, %ebp + movl mmu_cr4_features, %ecx - movl resume_pg_dir, %eax - subl $__PAGE_OFFSET, %eax + + /* jump to relocated restore code */ + movl relocated_restore_code, %eax + jmpl *%eax + +/* code below has been relocated to a safe page */ +ENTRY(core_restore_code) + movl temp_pgt, %eax movl %eax, %cr3 jecxz 1f # cr4 Pentium and higher, skip if zero @@ -49,7 +67,7 @@ copy_loop: movl pbe_address(%edx), %esi movl pbe_orig_address(%edx), %edi - movl $1024, %ecx + movl $(PAGE_SIZE >> 2), %ecx rep movsl @@ -58,10 +76,13 @@ copy_loop: .p2align 4,,7 done: + jmpl *%ebx + + /* code below belongs to the image kernel */ + .align PAGE_SIZE +ENTRY(restore_registers) /* go back to the original page tables */ - movl $swapper_pg_dir, %eax - subl $__PAGE_OFFSET, %eax - movl %eax, %cr3 + movl %ebp, %cr3 movl mmu_cr4_features, %ecx jecxz 1f # cr4 Pentium and higher, skip if zero movl %ecx, %cr4; # turn PGE back on @@ -82,4 +103,8 @@ done: xorl %eax, %eax + /* tell the hibernation core that we've just restored the memory */ + movl %eax, in_suspend + ret +ENDPROC(restore_registers) diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index fd369a6e9ff8ce..3008baa2fa9501 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -59,7 +59,7 @@ ENTRY(restore_image) movq restore_cr3(%rip), %r9 /* prepare to switch to temporary page tables */ - movq temp_level4_pgt(%rip), %rax + movq temp_pgt(%rip), %rax movq mmu_cr4_features(%rip), %rbx /* prepare to copy image data to their original locations */ diff --git a/drivers/acpi/acpi_lpit.c b/drivers/acpi/acpi_lpit.c index cf4fc016116415..e43cb71b697266 100644 --- a/drivers/acpi/acpi_lpit.c +++ b/drivers/acpi/acpi_lpit.c @@ -117,11 +117,17 @@ static void lpit_update_residency(struct lpit_residency_info *info, if (!info->iomem_addr) return; + if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) + return; + /* Silently fail, if cpuidle attribute group is not present */ sysfs_add_file_to_group(&cpu_subsys.dev_root->kobj, &dev_attr_low_power_idle_system_residency_us.attr, "cpuidle"); } else if (info->gaddr.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { + if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) + return; + /* Silently fail, if cpuidle attribute group is not present */ sysfs_add_file_to_group(&cpu_subsys.dev_root->kobj, &dev_attr_low_power_idle_cpu_residency_us.attr, diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index d9ce4b162e2ce0..217a782c3e5527 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1061,9 +1061,9 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); struct cpc_register_resource *highest_reg, *lowest_reg, - *lowest_non_linear_reg, *nominal_reg, + *lowest_non_linear_reg, *nominal_reg, *guaranteed_reg, *low_freq_reg = NULL, *nom_freq_reg = NULL; - u64 high, low, nom, min_nonlinear, low_f = 0, nom_f = 0; + u64 high, low, guaranteed, nom, min_nonlinear, low_f = 0, nom_f = 0; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); struct cppc_pcc_data *pcc_ss_data = NULL; int ret = 0, regs_in_pcc = 0; @@ -1079,6 +1079,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) nominal_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; low_freq_reg = &cpc_desc->cpc_regs[LOWEST_FREQ]; nom_freq_reg = &cpc_desc->cpc_regs[NOMINAL_FREQ]; + guaranteed_reg = &cpc_desc->cpc_regs[GUARANTEED_PERF]; /* Are any of the regs PCC ?*/ if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) || @@ -1107,6 +1108,9 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) cpc_read(cpunum, nominal_reg, &nom); perf_caps->nominal_perf = nom; + cpc_read(cpunum, guaranteed_reg, &guaranteed); + perf_caps->guaranteed_perf = guaranteed; + cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear); perf_caps->lowest_nonlinear_perf = min_nonlinear; diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 4b571419949080..7f38a92b444a9a 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -467,6 +467,10 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, return -EAGAIN; } + /* Default to shallowest state. */ + if (!genpd->gov) + genpd->state_idx = 0; + if (genpd->power_off) { int ret; @@ -1687,6 +1691,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd, ret = genpd_set_default_power_state(genpd); if (ret) return ret; + } else if (!gov) { + pr_warn("%s : no governor for states\n", genpd->name); } device_initialize(&genpd->dev); @@ -2478,8 +2484,8 @@ static int genpd_iterate_idle_states(struct device_node *dn, * * Returns the device states parsed from the OF node. The memory for the states * is allocated by this function and is the responsibility of the caller to - * free the memory after use. If no domain idle states is found it returns - * -EINVAL and in case of errors, a negative error code. + * free the memory after use. If any or zero compatible domain idle states is + * found it returns 0 and in case of errors, a negative error code is returned. */ int of_genpd_parse_idle_states(struct device_node *dn, struct genpd_power_state **states, int *n) @@ -2488,8 +2494,14 @@ int of_genpd_parse_idle_states(struct device_node *dn, int ret; ret = genpd_iterate_idle_states(dn, NULL); - if (ret <= 0) - return ret < 0 ? ret : -EINVAL; + if (ret < 0) + return ret; + + if (!ret) { + *states = NULL; + *n = 0; + return 0; + } st = kcalloc(ret, sizeof(*st), GFP_KERNEL); if (!st) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 30f3021497304a..fd25c21cee72f4 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -428,7 +428,7 @@ MODULE_LICENSE("GPL"); late_initcall(cppc_cpufreq_init); -static const struct acpi_device_id cppc_acpi_ids[] = { +static const struct acpi_device_id cppc_acpi_ids[] __used = { {ACPI_PROCESSOR_DEVICE_HID, }, {} }; diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index fe14c57de6cabc..b1c5468dca16b3 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -58,6 +58,7 @@ static const struct of_device_id whitelist[] __initconst = { { .compatible = "renesas,r8a73a4", }, { .compatible = "renesas,r8a7740", }, { .compatible = "renesas,r8a7743", }, + { .compatible = "renesas,r8a7744", }, { .compatible = "renesas,r8a7745", }, { .compatible = "renesas,r8a7778", }, { .compatible = "renesas,r8a7779", }, @@ -78,7 +79,10 @@ static const struct of_device_id whitelist[] __initconst = { { .compatible = "rockchip,rk3328", }, { .compatible = "rockchip,rk3366", }, { .compatible = "rockchip,rk3368", }, - { .compatible = "rockchip,rk3399", }, + { .compatible = "rockchip,rk3399", + .data = &(struct cpufreq_dt_platform_data) + { .have_governor_per_policy = true, }, + }, { .compatible = "st-ericsson,u8500", }, { .compatible = "st-ericsson,u8540", }, diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 0a9ebf00be468e..e58bfcb1169ebb 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -32,6 +32,7 @@ struct private_data { struct device *cpu_dev; struct thermal_cooling_device *cdev; const char *reg_name; + bool have_static_opps; }; static struct freq_attr *cpufreq_dt_attr[] = { @@ -204,6 +205,15 @@ static int cpufreq_init(struct cpufreq_policy *policy) } } + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto out_put_regulator; + } + + priv->reg_name = name; + priv->opp_table = opp_table; + /* * Initialize OPP tables for all policy->cpus. They will be shared by * all CPUs which have marked their CPUs shared with OPP bindings. @@ -214,7 +224,8 @@ static int cpufreq_init(struct cpufreq_policy *policy) * * OPPs might be populated at runtime, don't check for error here */ - dev_pm_opp_of_cpumask_add_table(policy->cpus); + if (!dev_pm_opp_of_cpumask_add_table(policy->cpus)) + priv->have_static_opps = true; /* * But we need OPP table to function so if it is not there let's @@ -240,19 +251,10 @@ static int cpufreq_init(struct cpufreq_policy *policy) __func__, ret); } - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) { - ret = -ENOMEM; - goto out_free_opp; - } - - priv->reg_name = name; - priv->opp_table = opp_table; - ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); - goto out_free_priv; + goto out_free_opp; } priv->cpu_dev = cpu_dev; @@ -282,10 +284,11 @@ static int cpufreq_init(struct cpufreq_policy *policy) out_free_cpufreq_table: dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); -out_free_priv: - kfree(priv); out_free_opp: - dev_pm_opp_of_cpumask_remove_table(policy->cpus); + if (priv->have_static_opps) + dev_pm_opp_of_cpumask_remove_table(policy->cpus); + kfree(priv); +out_put_regulator: if (name) dev_pm_opp_put_regulators(opp_table); out_put_clk: @@ -300,7 +303,8 @@ static int cpufreq_exit(struct cpufreq_policy *policy) cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); - dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); + if (priv->have_static_opps) + dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); if (priv->reg_name) dev_pm_opp_put_regulators(priv->opp_table); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index f53fb41efb7bf7..7aa3dcad21758c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -403,7 +403,7 @@ EXPORT_SYMBOL_GPL(cpufreq_freq_transition_begin); void cpufreq_freq_transition_end(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, int transition_failed) { - if (unlikely(WARN_ON(!policy->transition_ongoing))) + if (WARN_ON(!policy->transition_ongoing)) return; cpufreq_notify_post_transition(policy, freqs, transition_failed); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index f20f20a77d4d37..4268f87e99fcfb 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -80,8 +80,10 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy) * changed in the meantime, so fall back to current frequency in that * case. */ - if (requested_freq > policy->max || requested_freq < policy->min) + if (requested_freq > policy->max || requested_freq < policy->min) { requested_freq = policy->cur; + dbs_info->requested_freq = requested_freq; + } freq_step = get_freq_step(cs_tuners, policy); @@ -92,7 +94,7 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy) if (policy_dbs->idle_periods < UINT_MAX) { unsigned int freq_steps = policy_dbs->idle_periods * freq_step; - if (requested_freq > freq_steps) + if (requested_freq > policy->min + freq_steps) requested_freq -= freq_steps; else requested_freq = policy->min; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index b2ff423ad7f82f..8cfee0ab804b43 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -290,20 +291,32 @@ static void imx6q_opp_check_speed_grading(struct device *dev) #define OCOTP_CFG3_6ULL_SPEED_792MHZ 0x2 #define OCOTP_CFG3_6ULL_SPEED_900MHZ 0x3 -static void imx6ul_opp_check_speed_grading(struct device *dev) +static int imx6ul_opp_check_speed_grading(struct device *dev) { - struct device_node *np; - void __iomem *base; u32 val; + int ret = 0; - np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-ocotp"); - if (!np) - return; + if (of_find_property(dev->of_node, "nvmem-cells", NULL)) { + ret = nvmem_cell_read_u32(dev, "speed_grade", &val); + if (ret) + return ret; + } else { + struct device_node *np; + void __iomem *base; + + np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-ocotp"); + if (!np) + return -ENOENT; + + base = of_iomap(np, 0); + of_node_put(np); + if (!base) { + dev_err(dev, "failed to map ocotp\n"); + return -EFAULT; + } - base = of_iomap(np, 0); - if (!base) { - dev_err(dev, "failed to map ocotp\n"); - goto put_node; + val = readl_relaxed(base + OCOTP_CFG3); + iounmap(base); } /* @@ -314,7 +327,6 @@ static void imx6ul_opp_check_speed_grading(struct device *dev) * 2b'11: 900000000Hz on i.MX6ULL only; * We need to set the max speed of ARM according to fuse map. */ - val = readl_relaxed(base + OCOTP_CFG3); val >>= OCOTP_CFG3_SPEED_SHIFT; val &= 0x3; @@ -334,9 +346,7 @@ static void imx6ul_opp_check_speed_grading(struct device *dev) dev_warn(dev, "failed to disable 900MHz OPP\n"); } - iounmap(base); -put_node: - of_node_put(np); + return ret; } static int imx6q_cpufreq_probe(struct platform_device *pdev) @@ -394,10 +404,18 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) } if (of_machine_is_compatible("fsl,imx6ul") || - of_machine_is_compatible("fsl,imx6ull")) - imx6ul_opp_check_speed_grading(cpu_dev); - else + of_machine_is_compatible("fsl,imx6ull")) { + ret = imx6ul_opp_check_speed_grading(cpu_dev); + if (ret == -EPROBE_DEFER) + return ret; + if (ret) { + dev_err(cpu_dev, "failed to read ocotp: %d\n", + ret); + return ret; + } + } else { imx6q_opp_check_speed_grading(cpu_dev); + } /* Because we have added the OPPs here, we must free them */ free_opp = true; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b6a1aadaff9f35..2a99e2fd941246 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -373,10 +373,28 @@ static void intel_pstate_set_itmt_prio(int cpu) } } } + +static int intel_pstate_get_cppc_guranteed(int cpu) +{ + struct cppc_perf_caps cppc_perf; + int ret; + + ret = cppc_get_perf_caps(cpu, &cppc_perf); + if (ret) + return ret; + + return cppc_perf.guaranteed_perf; +} + #else static void intel_pstate_set_itmt_prio(int cpu) { } + +static int intel_pstate_get_cppc_guranteed(int cpu) +{ + return -ENOTSUPP; +} #endif static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) @@ -699,9 +717,29 @@ static ssize_t show_energy_performance_preference( cpufreq_freq_attr_rw(energy_performance_preference); +static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf) +{ + struct cpudata *cpu; + u64 cap; + int ratio; + + ratio = intel_pstate_get_cppc_guranteed(policy->cpu); + if (ratio <= 0) { + rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap); + ratio = HWP_GUARANTEED_PERF(cap); + } + + cpu = all_cpu_data[policy->cpu]; + + return sprintf(buf, "%d\n", ratio * cpu->pstate.scaling); +} + +cpufreq_freq_attr_ro(base_frequency); + static struct freq_attr *hwp_cpufreq_attrs[] = { &energy_performance_preference, &energy_performance_available_preferences, + &base_frequency, NULL, }; diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c index 31513bd427054b..6d33a639f902a9 100644 --- a/drivers/cpufreq/mvebu-cpufreq.c +++ b/drivers/cpufreq/mvebu-cpufreq.c @@ -84,9 +84,10 @@ static int __init armada_xp_pmsu_cpufreq_init(void) ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0); if (ret) { + dev_pm_opp_remove(cpu_dev, clk_get_rate(clk)); clk_put(clk); dev_err(cpu_dev, "Failed to register OPPs\n"); - goto opp_register_failed; + return ret; } ret = dev_pm_opp_set_sharing_cpus(cpu_dev, @@ -99,11 +100,5 @@ static int __init armada_xp_pmsu_cpufreq_init(void) platform_device_register_simple("cpufreq-dt", -1, NULL, 0); return 0; - -opp_register_failed: - /* As registering has failed remove all the opp for all cpus */ - dev_pm_opp_cpumask_remove_table(cpu_possible_mask); - - return ret; } device_initcall(armada_xp_pmsu_cpufreq_init); diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 5d31c2db12a3f3..dbecd7667db28a 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -611,8 +611,8 @@ static int s5pv210_cpufreq_probe(struct platform_device *pdev) for_each_compatible_node(np, NULL, "samsung,s5pv210-dmc") { id = of_alias_get_id(np, "dmc"); if (id < 0 || id >= ARRAY_SIZE(dmc_base)) { - pr_err("%s: failed to get alias of dmc node '%s'\n", - __func__, np->name); + pr_err("%s: failed to get alias of dmc node '%pOFn'\n", + __func__, np); of_node_put(np); return id; } diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index 1f59966573aa19..f1e09022b81944 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -121,7 +121,7 @@ static struct cpufreq_frequency_table *init_vhint_table( void *virt; virt = dma_alloc_coherent(bpmp->dev, sizeof(*data), &phys, - GFP_KERNEL | GFP_DMA32); + GFP_KERNEL); if (!virt) return ERR_PTR(-ENOMEM); diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 6df894d65d9e27..4a97446f66d836 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -247,17 +247,17 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, if (!cpuidle_state_is_coupled(drv, index)) local_irq_enable(); - diff = ktime_us_delta(time_end, time_start); - if (diff > INT_MAX) - diff = INT_MAX; - - dev->last_residency = (int) diff; - if (entered_state >= 0) { - /* Update cpuidle counters */ - /* This can be moved to within driver enter routine + /* + * Update cpuidle counters + * This can be moved to within driver enter routine, * but that results in multiple copies of same code. */ + diff = ktime_us_delta(time_end, time_start); + if (diff > INT_MAX) + diff = INT_MAX; + + dev->last_residency = (int)diff; dev->states_usage[entered_state].time += dev->last_residency; dev->states_usage[entered_state].usage++; } else { diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 704880a6612a76..f0dddc66af2608 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -80,7 +80,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, last_state = &ldev->states[last_idx]; - last_residency = cpuidle_get_last_residency(dev) - drv->states[last_idx].exit_latency; + last_residency = dev->last_residency - drv->states[last_idx].exit_latency; /* consider promotion */ if (last_idx < drv->state_count - 1 && diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index e26a40971b263e..575a68f31761f9 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -124,7 +124,6 @@ struct menu_device { int tick_wakeup; unsigned int next_timer_us; - unsigned int predicted_us; unsigned int bucket; unsigned int correction_factor[BUCKETS]; unsigned int intervals[INTERVALS]; @@ -197,10 +196,11 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static unsigned int get_typical_interval(struct menu_device *data) +static unsigned int get_typical_interval(struct menu_device *data, + unsigned int predicted_us) { int i, divisor; - unsigned int max, thresh, avg; + unsigned int min, max, thresh, avg; uint64_t sum, variance; thresh = UINT_MAX; /* Discard outliers above this value */ @@ -208,6 +208,7 @@ static unsigned int get_typical_interval(struct menu_device *data) again: /* First calculate the average of past intervals */ + min = UINT_MAX; max = 0; sum = 0; divisor = 0; @@ -218,8 +219,19 @@ static unsigned int get_typical_interval(struct menu_device *data) divisor++; if (value > max) max = value; + + if (value < min) + min = value; } } + + /* + * If the result of the computation is going to be discarded anyway, + * avoid the computation altogether. + */ + if (min >= predicted_us) + return UINT_MAX; + if (divisor == INTERVALS) avg = sum >> INTERVAL_SHIFT; else @@ -286,10 +298,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, struct menu_device *data = this_cpu_ptr(&menu_devices); int latency_req = cpuidle_governor_latency_req(dev->cpu); int i; - int first_idx; int idx; unsigned int interactivity_req; - unsigned int expected_interval; + unsigned int predicted_us; unsigned long nr_iowaiters, cpu_load; ktime_t delta_next; @@ -298,50 +309,36 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, data->needs_update = 0; } - /* Special case when user has set very strict latency requirement */ - if (unlikely(latency_req == 0)) { - *stop_tick = false; - return 0; - } - /* determine the expected residency time, round up */ data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next)); get_iowait_load(&nr_iowaiters, &cpu_load); data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); + if (unlikely(drv->state_count <= 1 || latency_req == 0) || + ((data->next_timer_us < drv->states[1].target_residency || + latency_req < drv->states[1].exit_latency) && + !drv->states[0].disabled && !dev->states_usage[0].disable)) { + /* + * In this case state[0] will be used no matter what, so return + * it right away and keep the tick running. + */ + *stop_tick = false; + return 0; + } + /* * Force the result of multiplication to be 64 bits even if both * operands are 32 bits. * Make sure to round up for half microseconds. */ - data->predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us * + predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us * data->correction_factor[data->bucket], RESOLUTION * DECAY); - - expected_interval = get_typical_interval(data); - expected_interval = min(expected_interval, data->next_timer_us); - - first_idx = 0; - if (drv->states[0].flags & CPUIDLE_FLAG_POLLING) { - struct cpuidle_state *s = &drv->states[1]; - unsigned int polling_threshold; - - /* - * Default to a physical idle state, not to busy polling, unless - * a timer is going to trigger really really soon. - */ - polling_threshold = max_t(unsigned int, 20, s->target_residency); - if (data->next_timer_us > polling_threshold && - latency_req > s->exit_latency && !s->disabled && - !dev->states_usage[1].disable) - first_idx = 1; - } - /* * Use the lowest expected idle interval to pick the idle state. */ - data->predicted_us = min(data->predicted_us, expected_interval); + predicted_us = min(predicted_us, get_typical_interval(data, predicted_us)); if (tick_nohz_tick_stopped()) { /* @@ -352,34 +349,46 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * the known time till the closest timer event for the idle * state selection. */ - if (data->predicted_us < TICK_USEC) - data->predicted_us = ktime_to_us(delta_next); + if (predicted_us < TICK_USEC) + predicted_us = ktime_to_us(delta_next); } else { /* * Use the performance multiplier and the user-configurable * latency_req to determine the maximum exit latency. */ - interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load); + interactivity_req = predicted_us / performance_multiplier(nr_iowaiters, cpu_load); if (latency_req > interactivity_req) latency_req = interactivity_req; } - expected_interval = data->predicted_us; /* * Find the idle state with the lowest power while satisfying * our constraints. */ idx = -1; - for (i = first_idx; i < drv->state_count; i++) { + for (i = 0; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state_usage *su = &dev->states_usage[i]; if (s->disabled || su->disable) continue; + if (idx == -1) idx = i; /* first enabled state */ - if (s->target_residency > data->predicted_us) { - if (data->predicted_us < TICK_USEC) + + if (s->target_residency > predicted_us) { + /* + * Use a physical idle state, not busy polling, unless + * a timer is going to trigger soon enough. + */ + if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && + s->exit_latency <= latency_req && + s->target_residency <= data->next_timer_us) { + predicted_us = s->target_residency; + idx = i; + break; + } + if (predicted_us < TICK_USEC) break; if (!tick_nohz_tick_stopped()) { @@ -389,7 +398,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * tick in that case and let the governor run * again in the next iteration of the loop. */ - expected_interval = drv->states[idx].target_residency; + predicted_us = drv->states[idx].target_residency; break; } @@ -403,7 +412,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, s->target_residency <= ktime_to_us(delta_next)) idx = i; - goto out; + return idx; } if (s->exit_latency > latency_req) { /* @@ -412,7 +421,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * expected idle duration so that the tick is retained * as long as that target residency is low enough. */ - expected_interval = drv->states[idx].target_residency; + predicted_us = drv->states[idx].target_residency; break; } idx = i; @@ -426,7 +435,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * expected idle duration is shorter than the tick period length. */ if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || - expected_interval < TICK_USEC) && !tick_nohz_tick_stopped()) { + predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) { unsigned int delta_next_us = ktime_to_us(delta_next); *stop_tick = false; @@ -450,10 +459,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, } } -out: - data->last_state_idx = idx; - - return data->last_state_idx; + return idx; } /** @@ -512,9 +518,19 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * duration predictor do a better job next time. */ measured_us = 9 * MAX_INTERESTING / 10; + } else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) && + dev->poll_time_limit) { + /* + * The CPU exited the "polling" state due to a time limit, so + * the idle duration prediction leading to the selection of that + * state was inaccurate. If a better prediction had been made, + * the CPU might have been woken up from idle by the next timer. + * Assume that to be the case. + */ + measured_us = data->next_timer_us; } else { /* measured value */ - measured_us = cpuidle_get_last_residency(dev); + measured_us = dev->last_residency; /* Deduct exit latency */ if (measured_us > 2 * target->exit_latency) diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index 3f86d23c592ec0..85792d371add65 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -9,7 +9,6 @@ #include #include -#define POLL_IDLE_TIME_LIMIT (TICK_NSEC / 16) #define POLL_IDLE_RELAX_COUNT 200 static int __cpuidle poll_idle(struct cpuidle_device *dev, @@ -17,8 +16,11 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, { u64 time_start = local_clock(); + dev->poll_time_limit = false; + local_irq_enable(); if (!current_set_polling_and_test()) { + u64 limit = (u64)drv->states[1].target_residency * NSEC_PER_USEC; unsigned int loop_count = 0; while (!need_resched()) { @@ -27,8 +29,10 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, continue; loop_count = 0; - if (local_clock() - time_start > POLL_IDLE_TIME_LIMIT) + if (local_clock() - time_start > limit) { + dev->poll_time_limit = true; break; + } } } current_clr_polling(); diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 4c49bb1330b527..141413067b5ce2 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -28,9 +29,6 @@ #include #include "governor.h" -#define MAX(a,b) ((a > b) ? a : b) -#define MIN(a,b) ((a < b) ? a : b) - static struct class *devfreq_class; /* @@ -221,6 +219,49 @@ static struct devfreq_governor *find_devfreq_governor(const char *name) return ERR_PTR(-ENODEV); } +/** + * try_then_request_governor() - Try to find the governor and request the + * module if is not found. + * @name: name of the governor + * + * Search the list of devfreq governors and request the module and try again + * if is not found. This can happen when both drivers (the governor driver + * and the driver that call devfreq_add_device) are built as modules. + * devfreq_list_lock should be held by the caller. Returns the matched + * governor's pointer. + */ +static struct devfreq_governor *try_then_request_governor(const char *name) +{ + struct devfreq_governor *governor; + int err = 0; + + if (IS_ERR_OR_NULL(name)) { + pr_err("DEVFREQ: %s: Invalid parameters\n", __func__); + return ERR_PTR(-EINVAL); + } + WARN(!mutex_is_locked(&devfreq_list_lock), + "devfreq_list_lock must be locked."); + + governor = find_devfreq_governor(name); + if (IS_ERR(governor)) { + mutex_unlock(&devfreq_list_lock); + + if (!strncmp(name, DEVFREQ_GOV_SIMPLE_ONDEMAND, + DEVFREQ_NAME_LEN)) + err = request_module("governor_%s", "simpleondemand"); + else + err = request_module("governor_%s", name); + /* Restore previous state before return */ + mutex_lock(&devfreq_list_lock); + if (err) + return NULL; + + governor = find_devfreq_governor(name); + } + + return governor; +} + static int devfreq_notify_transition(struct devfreq *devfreq, struct devfreq_freqs *freqs, unsigned int state) { @@ -280,14 +321,14 @@ int update_devfreq(struct devfreq *devfreq) * max_freq * min_freq */ - max_freq = MIN(devfreq->scaling_max_freq, devfreq->max_freq); - min_freq = MAX(devfreq->scaling_min_freq, devfreq->min_freq); + max_freq = min(devfreq->scaling_max_freq, devfreq->max_freq); + min_freq = max(devfreq->scaling_min_freq, devfreq->min_freq); - if (min_freq && freq < min_freq) { + if (freq < min_freq) { freq = min_freq; flags &= ~DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use GLB */ } - if (max_freq && freq > max_freq) { + if (freq > max_freq) { freq = max_freq; flags |= DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use LUB */ } @@ -534,10 +575,6 @@ static void devfreq_dev_release(struct device *dev) list_del(&devfreq->node); mutex_unlock(&devfreq_list_lock); - if (devfreq->governor) - devfreq->governor->event_handler(devfreq, - DEVFREQ_GOV_STOP, NULL); - if (devfreq->profile->exit) devfreq->profile->exit(devfreq->dev.parent); @@ -646,9 +683,8 @@ struct devfreq *devfreq_add_device(struct device *dev, mutex_unlock(&devfreq->lock); mutex_lock(&devfreq_list_lock); - list_add(&devfreq->node, &devfreq_list); - governor = find_devfreq_governor(devfreq->governor_name); + governor = try_then_request_governor(devfreq->governor_name); if (IS_ERR(governor)) { dev_err(dev, "%s: Unable to find governor for the device\n", __func__); @@ -664,19 +700,20 @@ struct devfreq *devfreq_add_device(struct device *dev, __func__); goto err_init; } + + list_add(&devfreq->node, &devfreq_list); + mutex_unlock(&devfreq_list_lock); return devfreq; err_init: - list_del(&devfreq->node); mutex_unlock(&devfreq_list_lock); - device_unregister(&devfreq->dev); + devfreq_remove_device(devfreq); devfreq = NULL; err_dev: - if (devfreq) - kfree(devfreq); + kfree(devfreq); err_out: return ERR_PTR(err); } @@ -693,6 +730,9 @@ int devfreq_remove_device(struct devfreq *devfreq) if (!devfreq) return -EINVAL; + if (devfreq->governor) + devfreq->governor->event_handler(devfreq, + DEVFREQ_GOV_STOP, NULL); device_unregister(&devfreq->dev); return 0; @@ -991,7 +1031,7 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, return -EINVAL; mutex_lock(&devfreq_list_lock); - governor = find_devfreq_governor(str_governor); + governor = try_then_request_governor(str_governor); if (IS_ERR(governor)) { ret = PTR_ERR(governor); goto out; @@ -1126,17 +1166,26 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, struct devfreq *df = to_devfreq(dev); unsigned long value; int ret; - unsigned long max; ret = sscanf(buf, "%lu", &value); if (ret != 1) return -EINVAL; mutex_lock(&df->lock); - max = df->max_freq; - if (value && max && value > max) { - ret = -EINVAL; - goto unlock; + + if (value) { + if (value > df->max_freq) { + ret = -EINVAL; + goto unlock; + } + } else { + unsigned long *freq_table = df->profile->freq_table; + + /* Get minimum frequency according to sorting order */ + if (freq_table[0] < freq_table[df->profile->max_state - 1]) + value = freq_table[0]; + else + value = freq_table[df->profile->max_state - 1]; } df->min_freq = value; @@ -1152,7 +1201,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, { struct devfreq *df = to_devfreq(dev); - return sprintf(buf, "%lu\n", MAX(df->scaling_min_freq, df->min_freq)); + return sprintf(buf, "%lu\n", max(df->scaling_min_freq, df->min_freq)); } static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, @@ -1161,17 +1210,26 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, struct devfreq *df = to_devfreq(dev); unsigned long value; int ret; - unsigned long min; ret = sscanf(buf, "%lu", &value); if (ret != 1) return -EINVAL; mutex_lock(&df->lock); - min = df->min_freq; - if (value && min && value < min) { - ret = -EINVAL; - goto unlock; + + if (value) { + if (value < df->min_freq) { + ret = -EINVAL; + goto unlock; + } + } else { + unsigned long *freq_table = df->profile->freq_table; + + /* Get maximum frequency according to sorting order */ + if (freq_table[0] < freq_table[df->profile->max_state - 1]) + value = freq_table[df->profile->max_state - 1]; + else + value = freq_table[0]; } df->max_freq = value; @@ -1188,7 +1246,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, { struct devfreq *df = to_devfreq(dev); - return sprintf(buf, "%lu\n", MIN(df->scaling_max_freq, df->max_freq)); + return sprintf(buf, "%lu\n", min(df->scaling_max_freq, df->max_freq)); } static DEVICE_ATTR_RW(max_freq); diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index a9c64f0d328466..c61de0bdf05321 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -535,8 +535,8 @@ static int of_get_devfreq_events(struct device_node *np, if (i == ARRAY_SIZE(ppmu_events)) { dev_warn(dev, - "don't know how to configure events : %s\n", - node->name); + "don't know how to configure events : %pOFn\n", + node); continue; } diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index cfc50a61a90dbd..f53339ca610fc3 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -25,6 +25,9 @@ #define DEVFREQ_GOV_SUSPEND 0x4 #define DEVFREQ_GOV_RESUME 0x5 +#define DEVFREQ_MIN_FREQ 0 +#define DEVFREQ_MAX_FREQ ULONG_MAX + /** * struct devfreq_governor - Devfreq policy governor * @node: list node - contains registered devfreq governors @@ -54,9 +57,6 @@ struct devfreq_governor { unsigned int event, void *data); }; -/* Caution: devfreq->lock must be locked before calling update_devfreq */ -extern int update_devfreq(struct devfreq *devfreq); - extern void devfreq_monitor_start(struct devfreq *devfreq); extern void devfreq_monitor_stop(struct devfreq *devfreq); extern void devfreq_monitor_suspend(struct devfreq *devfreq); diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c index 4d23ecfbd948eb..ded429fd51be7e 100644 --- a/drivers/devfreq/governor_performance.c +++ b/drivers/devfreq/governor_performance.c @@ -20,10 +20,7 @@ static int devfreq_performance_func(struct devfreq *df, * target callback should be able to get floor value as * said in devfreq.h */ - if (!df->max_freq) - *freq = UINT_MAX; - else - *freq = df->max_freq; + *freq = DEVFREQ_MAX_FREQ; return 0; } diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c index 0c42f23249ef6d..9e8897f5ac4262 100644 --- a/drivers/devfreq/governor_powersave.c +++ b/drivers/devfreq/governor_powersave.c @@ -20,7 +20,7 @@ static int devfreq_powersave_func(struct devfreq *df, * target callback should be able to get ceiling value as * said in devfreq.h */ - *freq = df->min_freq; + *freq = DEVFREQ_MIN_FREQ; return 0; } diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c index 28e0f2de7100ee..c0417f0e081e0b 100644 --- a/drivers/devfreq/governor_simpleondemand.c +++ b/drivers/devfreq/governor_simpleondemand.c @@ -27,7 +27,6 @@ static int devfreq_simple_ondemand_func(struct devfreq *df, unsigned int dfso_upthreshold = DFSO_UPTHRESHOLD; unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENCTIAL; struct devfreq_simple_ondemand_data *data = df->data; - unsigned long max = (df->max_freq) ? df->max_freq : UINT_MAX; err = devfreq_update_stats(df); if (err) @@ -47,7 +46,7 @@ static int devfreq_simple_ondemand_func(struct devfreq *df, /* Assume MAX if it is going to be divided by zero */ if (stat->total_time == 0) { - *freq = max; + *freq = DEVFREQ_MAX_FREQ; return 0; } @@ -60,13 +59,13 @@ static int devfreq_simple_ondemand_func(struct devfreq *df, /* Set MAX if it's busy enough */ if (stat->busy_time * 100 > stat->total_time * dfso_upthreshold) { - *freq = max; + *freq = DEVFREQ_MAX_FREQ; return 0; } /* Set MAX if we do not know the initial frequency */ if (stat->current_frequency == 0) { - *freq = max; + *freq = DEVFREQ_MAX_FREQ; return 0; } @@ -85,11 +84,6 @@ static int devfreq_simple_ondemand_func(struct devfreq *df, b = div_u64(b, (dfso_upthreshold - dfso_downdifferential / 2)); *freq = (unsigned long) b; - if (df->min_freq && *freq < df->min_freq) - *freq = df->min_freq; - if (df->max_freq && *freq > df->max_freq) - *freq = df->max_freq; - return 0; } diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c index 080607c3f34d62..378d84c011df85 100644 --- a/drivers/devfreq/governor_userspace.c +++ b/drivers/devfreq/governor_userspace.c @@ -26,19 +26,11 @@ static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq) { struct userspace_data *data = df->data; - if (data->valid) { - unsigned long adjusted_freq = data->user_frequency; - - if (df->max_freq && adjusted_freq > df->max_freq) - adjusted_freq = df->max_freq; - - if (df->min_freq && adjusted_freq < df->min_freq) - adjusted_freq = df->min_freq; - - *freq = adjusted_freq; - } else { + if (data->valid) + *freq = data->user_frequency; + else *freq = df->previous_freq; /* No user freq specified yet */ - } + return 0; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b2ccce5fb07183..791b8a366e6eda 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1066,46 +1066,43 @@ static const struct idle_cpu idle_cpu_dnv = { .disable_promotion_to_c1e = true, }; -#define ICPU(model, cpu) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&cpu } - static const struct x86_cpu_id intel_idle_ids[] __initconst = { - ICPU(INTEL_FAM6_NEHALEM_EP, idle_cpu_nehalem), - ICPU(INTEL_FAM6_NEHALEM, idle_cpu_nehalem), - ICPU(INTEL_FAM6_NEHALEM_G, idle_cpu_nehalem), - ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem), - ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem), - ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem), - ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom), - ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft), - ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem), - ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb), - ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb), - ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom), - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt), - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, idle_cpu_tangier), - ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht), - ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb), - ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt), - ICPU(INTEL_FAM6_HASWELL_CORE, idle_cpu_hsw), - ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw), - ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw), - ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw), - ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn), - ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw), - ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw), - ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw), - ICPU(INTEL_FAM6_BROADWELL_XEON_D, idle_cpu_bdw), - ICPU(INTEL_FAM6_SKYLAKE_MOBILE, idle_cpu_skl), - ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, idle_cpu_skl), - ICPU(INTEL_FAM6_KABYLAKE_MOBILE, idle_cpu_skl), - ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, idle_cpu_skl), - ICPU(INTEL_FAM6_SKYLAKE_X, idle_cpu_skx), - ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl), - ICPU(INTEL_FAM6_XEON_PHI_KNM, idle_cpu_knl), - ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt), - ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, idle_cpu_bxt), - ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv), + INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nehalem), + INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem), + INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem), + INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem), + INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nehalem), + INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nehalem), + INTEL_CPU_FAM6(ATOM_PINEVIEW, idle_cpu_atom), + INTEL_CPU_FAM6(ATOM_LINCROFT, idle_cpu_lincroft), + INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nehalem), + INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb), + INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snb), + INTEL_CPU_FAM6(ATOM_CEDARVIEW, idle_cpu_atom), + INTEL_CPU_FAM6(ATOM_SILVERMONT1, idle_cpu_byt), + INTEL_CPU_FAM6(ATOM_MERRIFIELD, idle_cpu_tangier), + INTEL_CPU_FAM6(ATOM_AIRMONT, idle_cpu_cht), + INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb), + INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt), + INTEL_CPU_FAM6(HASWELL_CORE, idle_cpu_hsw), + INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsw), + INTEL_CPU_FAM6(HASWELL_ULT, idle_cpu_hsw), + INTEL_CPU_FAM6(HASWELL_GT3E, idle_cpu_hsw), + INTEL_CPU_FAM6(ATOM_SILVERMONT2, idle_cpu_avn), + INTEL_CPU_FAM6(BROADWELL_CORE, idle_cpu_bdw), + INTEL_CPU_FAM6(BROADWELL_GT3E, idle_cpu_bdw), + INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdw), + INTEL_CPU_FAM6(BROADWELL_XEON_D, idle_cpu_bdw), + INTEL_CPU_FAM6(SKYLAKE_MOBILE, idle_cpu_skl), + INTEL_CPU_FAM6(SKYLAKE_DESKTOP, idle_cpu_skl), + INTEL_CPU_FAM6(KABYLAKE_MOBILE, idle_cpu_skl), + INTEL_CPU_FAM6(KABYLAKE_DESKTOP, idle_cpu_skl), + INTEL_CPU_FAM6(SKYLAKE_X, idle_cpu_skx), + INTEL_CPU_FAM6(XEON_PHI_KNL, idle_cpu_knl), + INTEL_CPU_FAM6(XEON_PHI_KNM, idle_cpu_knl), + INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt), + INTEL_CPU_FAM6(ATOM_GEMINI_LAKE, idle_cpu_bxt), + INTEL_CPU_FAM6(ATOM_DENVERTON, idle_cpu_dnv), {} }; diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 31ff03dbeb8377..2c2df4e4fc14db 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -48,9 +48,14 @@ static struct opp_device *_find_opp_dev(const struct device *dev, static struct opp_table *_find_opp_table_unlocked(struct device *dev) { struct opp_table *opp_table; + bool found; list_for_each_entry(opp_table, &opp_tables, node) { - if (_find_opp_dev(dev, opp_table)) { + mutex_lock(&opp_table->lock); + found = !!_find_opp_dev(dev, opp_table); + mutex_unlock(&opp_table->lock); + + if (found) { _get_opp_table_kref(opp_table); return opp_table; @@ -313,7 +318,7 @@ int dev_pm_opp_get_opp_count(struct device *dev) count = PTR_ERR(opp_table); dev_dbg(dev, "%s: OPP table not found (%d)\n", __func__, count); - return 0; + return count; } count = _get_opp_count(opp_table); @@ -754,8 +759,8 @@ static void _remove_opp_dev(struct opp_device *opp_dev, kfree(opp_dev); } -struct opp_device *_add_opp_dev(const struct device *dev, - struct opp_table *opp_table) +static struct opp_device *_add_opp_dev_unlocked(const struct device *dev, + struct opp_table *opp_table) { struct opp_device *opp_dev; int ret; @@ -766,6 +771,7 @@ struct opp_device *_add_opp_dev(const struct device *dev, /* Initialize opp-dev */ opp_dev->dev = dev; + list_add(&opp_dev->node, &opp_table->dev_list); /* Create debugfs entries for the opp_table */ @@ -777,7 +783,19 @@ struct opp_device *_add_opp_dev(const struct device *dev, return opp_dev; } -static struct opp_table *_allocate_opp_table(struct device *dev) +struct opp_device *_add_opp_dev(const struct device *dev, + struct opp_table *opp_table) +{ + struct opp_device *opp_dev; + + mutex_lock(&opp_table->lock); + opp_dev = _add_opp_dev_unlocked(dev, opp_table); + mutex_unlock(&opp_table->lock); + + return opp_dev; +} + +static struct opp_table *_allocate_opp_table(struct device *dev, int index) { struct opp_table *opp_table; struct opp_device *opp_dev; @@ -791,6 +809,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev) if (!opp_table) return NULL; + mutex_init(&opp_table->lock); INIT_LIST_HEAD(&opp_table->dev_list); opp_dev = _add_opp_dev(dev, opp_table); @@ -799,7 +818,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev) return NULL; } - _of_init_opp_table(opp_table, dev); + _of_init_opp_table(opp_table, dev, index); /* Find clk for the device */ opp_table->clk = clk_get(dev, NULL); @@ -812,7 +831,6 @@ static struct opp_table *_allocate_opp_table(struct device *dev) BLOCKING_INIT_NOTIFIER_HEAD(&opp_table->head); INIT_LIST_HEAD(&opp_table->opp_list); - mutex_init(&opp_table->lock); kref_init(&opp_table->kref); /* Secure the device table modification */ @@ -825,7 +843,7 @@ void _get_opp_table_kref(struct opp_table *opp_table) kref_get(&opp_table->kref); } -struct opp_table *dev_pm_opp_get_opp_table(struct device *dev) +static struct opp_table *_opp_get_opp_table(struct device *dev, int index) { struct opp_table *opp_table; @@ -836,31 +854,56 @@ struct opp_table *dev_pm_opp_get_opp_table(struct device *dev) if (!IS_ERR(opp_table)) goto unlock; - opp_table = _allocate_opp_table(dev); + opp_table = _managed_opp(dev, index); + if (opp_table) { + if (!_add_opp_dev_unlocked(dev, opp_table)) { + dev_pm_opp_put_opp_table(opp_table); + opp_table = NULL; + } + goto unlock; + } + + opp_table = _allocate_opp_table(dev, index); unlock: mutex_unlock(&opp_table_lock); return opp_table; } + +struct opp_table *dev_pm_opp_get_opp_table(struct device *dev) +{ + return _opp_get_opp_table(dev, 0); +} EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_table); +struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev, + int index) +{ + return _opp_get_opp_table(dev, index); +} + static void _opp_table_kref_release(struct kref *kref) { struct opp_table *opp_table = container_of(kref, struct opp_table, kref); - struct opp_device *opp_dev; + struct opp_device *opp_dev, *temp; /* Release clk */ if (!IS_ERR(opp_table->clk)) clk_put(opp_table->clk); - opp_dev = list_first_entry(&opp_table->dev_list, struct opp_device, - node); + WARN_ON(!list_empty(&opp_table->opp_list)); - _remove_opp_dev(opp_dev, opp_table); + list_for_each_entry_safe(opp_dev, temp, &opp_table->dev_list, node) { + /* + * The OPP table is getting removed, drop the performance state + * constraints. + */ + if (opp_table->genpd_performance_state) + dev_pm_genpd_set_performance_state((struct device *)(opp_dev->dev), 0); - /* dev_list must be empty now */ - WARN_ON(!list_empty(&opp_table->dev_list)); + _remove_opp_dev(opp_dev, opp_table); + } mutex_destroy(&opp_table->lock); list_del(&opp_table->node); @@ -869,6 +912,33 @@ static void _opp_table_kref_release(struct kref *kref) mutex_unlock(&opp_table_lock); } +void _opp_remove_all_static(struct opp_table *opp_table) +{ + struct dev_pm_opp *opp, *tmp; + + list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) { + if (!opp->dynamic) + dev_pm_opp_put(opp); + } + + opp_table->parsed_static_opps = false; +} + +static void _opp_table_list_kref_release(struct kref *kref) +{ + struct opp_table *opp_table = container_of(kref, struct opp_table, + list_kref); + + _opp_remove_all_static(opp_table); + mutex_unlock(&opp_table_lock); +} + +void _put_opp_list_kref(struct opp_table *opp_table) +{ + kref_put_mutex(&opp_table->list_kref, _opp_table_list_kref_release, + &opp_table_lock); +} + void dev_pm_opp_put_opp_table(struct opp_table *opp_table) { kref_put_mutex(&opp_table->kref, _opp_table_kref_release, @@ -896,7 +966,6 @@ static void _opp_kref_release(struct kref *kref) kfree(opp); mutex_unlock(&opp_table->lock); - dev_pm_opp_put_opp_table(opp_table); } void dev_pm_opp_get(struct dev_pm_opp *opp) @@ -940,11 +1009,15 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq) if (found) { dev_pm_opp_put(opp); + + /* Drop the reference taken by dev_pm_opp_add() */ + dev_pm_opp_put_opp_table(opp_table); } else { dev_warn(dev, "%s: Couldn't find OPP with freq: %lu\n", __func__, freq); } + /* Drop the reference taken by _find_opp_table() */ dev_pm_opp_put_opp_table(opp_table); } EXPORT_SYMBOL_GPL(dev_pm_opp_remove); @@ -1062,9 +1135,6 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, new_opp->opp_table = opp_table; kref_init(&new_opp->kref); - /* Get a reference to the OPP table */ - _get_opp_table_kref(opp_table); - ret = opp_debug_create_one(new_opp, opp_table); if (ret) dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n", @@ -1543,8 +1613,9 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) return -ENOMEM; ret = _opp_add_v1(opp_table, dev, freq, u_volt, true); + if (ret) + dev_pm_opp_put_opp_table(opp_table); - dev_pm_opp_put_opp_table(opp_table); return ret; } EXPORT_SYMBOL_GPL(dev_pm_opp_add); @@ -1707,35 +1778,7 @@ int dev_pm_opp_unregister_notifier(struct device *dev, } EXPORT_SYMBOL(dev_pm_opp_unregister_notifier); -/* - * Free OPPs either created using static entries present in DT or even the - * dynamically added entries based on remove_all param. - */ -void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev, - bool remove_all) -{ - struct dev_pm_opp *opp, *tmp; - - /* Find if opp_table manages a single device */ - if (list_is_singular(&opp_table->dev_list)) { - /* Free static OPPs */ - list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) { - if (remove_all || !opp->dynamic) - dev_pm_opp_put(opp); - } - - /* - * The OPP table is getting removed, drop the performance state - * constraints. - */ - if (opp_table->genpd_performance_state) - dev_pm_genpd_set_performance_state(dev, 0); - } else { - _remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table); - } -} - -void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all) +void _dev_pm_opp_find_and_remove_table(struct device *dev) { struct opp_table *opp_table; @@ -1752,8 +1795,12 @@ void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all) return; } - _dev_pm_opp_remove_table(opp_table, dev, remove_all); + _put_opp_list_kref(opp_table); + + /* Drop reference taken by _find_opp_table() */ + dev_pm_opp_put_opp_table(opp_table); + /* Drop reference taken while the OPP table was added */ dev_pm_opp_put_opp_table(opp_table); } @@ -1766,6 +1813,6 @@ void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all) */ void dev_pm_opp_remove_table(struct device *dev) { - _dev_pm_opp_find_and_remove_table(dev, true); + _dev_pm_opp_find_and_remove_table(dev); } EXPORT_SYMBOL_GPL(dev_pm_opp_remove_table); diff --git a/drivers/opp/cpu.c b/drivers/opp/cpu.c index 0c091070943504..ab6d07e78945db 100644 --- a/drivers/opp/cpu.c +++ b/drivers/opp/cpu.c @@ -108,7 +108,8 @@ void dev_pm_opp_free_cpufreq_table(struct device *dev, EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table); #endif /* CONFIG_CPU_FREQ */ -void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of) +void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, + int last_cpu) { struct device *cpu_dev; int cpu; @@ -116,6 +117,9 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of) WARN_ON(cpumask_empty(cpumask)); for_each_cpu(cpu, cpumask) { + if (cpu == last_cpu) + break; + cpu_dev = get_cpu_device(cpu); if (!cpu_dev) { pr_err("%s: failed to get cpu%d device\n", __func__, @@ -123,10 +127,7 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of) continue; } - if (of) - dev_pm_opp_of_remove_table(cpu_dev); - else - dev_pm_opp_remove_table(cpu_dev); + _dev_pm_opp_find_and_remove_table(cpu_dev); } } @@ -140,7 +141,7 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of) */ void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask) { - _dev_pm_opp_cpumask_remove_table(cpumask, false); + _dev_pm_opp_cpumask_remove_table(cpumask, -1); } EXPORT_SYMBOL_GPL(dev_pm_opp_cpumask_remove_table); @@ -222,8 +223,10 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) cpumask_clear(cpumask); if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) { + mutex_lock(&opp_table->lock); list_for_each_entry(opp_dev, &opp_table->dev_list, node) cpumask_set_cpu(opp_dev->dev->id, cpumask); + mutex_unlock(&opp_table->lock); } else { cpumask_set_cpu(cpu_dev->id, cpumask); } diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 7af0ddec936bb4..5a4b47958073ee 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -23,11 +23,32 @@ #include "opp.h" -static struct opp_table *_managed_opp(const struct device_node *np) +/* + * Returns opp descriptor node for a device node, caller must + * do of_node_put(). + */ +static struct device_node *_opp_of_get_opp_desc_node(struct device_node *np, + int index) +{ + /* "operating-points-v2" can be an array for power domain providers */ + return of_parse_phandle(np, "operating-points-v2", index); +} + +/* Returns opp descriptor node for a device, caller must do of_node_put() */ +struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev) +{ + return _opp_of_get_opp_desc_node(dev->of_node, 0); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node); + +struct opp_table *_managed_opp(struct device *dev, int index) { struct opp_table *opp_table, *managed_table = NULL; + struct device_node *np; - mutex_lock(&opp_table_lock); + np = _opp_of_get_opp_desc_node(dev->of_node, index); + if (!np) + return NULL; list_for_each_entry(opp_table, &opp_tables, node) { if (opp_table->np == np) { @@ -47,29 +68,45 @@ static struct opp_table *_managed_opp(const struct device_node *np) } } - mutex_unlock(&opp_table_lock); + of_node_put(np); return managed_table; } -void _of_init_opp_table(struct opp_table *opp_table, struct device *dev) +void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, + int index) { - struct device_node *np; + struct device_node *np, *opp_np; + u32 val; /* * Only required for backward compatibility with v1 bindings, but isn't * harmful for other cases. And so we do it unconditionally. */ np = of_node_get(dev->of_node); - if (np) { - u32 val; - - if (!of_property_read_u32(np, "clock-latency", &val)) - opp_table->clock_latency_ns_max = val; - of_property_read_u32(np, "voltage-tolerance", - &opp_table->voltage_tolerance_v1); - of_node_put(np); - } + if (!np) + return; + + if (!of_property_read_u32(np, "clock-latency", &val)) + opp_table->clock_latency_ns_max = val; + of_property_read_u32(np, "voltage-tolerance", + &opp_table->voltage_tolerance_v1); + + /* Get OPP table node */ + opp_np = _opp_of_get_opp_desc_node(np, index); + of_node_put(np); + + if (!opp_np) + return; + + if (of_property_read_bool(opp_np, "opp-shared")) + opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED; + else + opp_table->shared_opp = OPP_TABLE_ACCESS_EXCLUSIVE; + + opp_table->np = opp_np; + + of_node_put(opp_np); } static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, @@ -245,26 +282,10 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, */ void dev_pm_opp_of_remove_table(struct device *dev) { - _dev_pm_opp_find_and_remove_table(dev, false); + _dev_pm_opp_find_and_remove_table(dev); } EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table); -/* Returns opp descriptor node for a device node, caller must - * do of_node_put() */ -static struct device_node *_opp_of_get_opp_desc_node(struct device_node *np, - int index) -{ - /* "operating-points-v2" can be an array for power domain providers */ - return of_parse_phandle(np, "operating-points-v2", index); -} - -/* Returns opp descriptor node for a device, caller must do of_node_put() */ -struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev) -{ - return _opp_of_get_opp_desc_node(dev->of_node, 0); -} -EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node); - /** * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings) * @opp_table: OPP table @@ -276,15 +297,21 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node); * removed by dev_pm_opp_remove. * * Return: - * 0 On success OR + * Valid OPP pointer: + * On success + * NULL: * Duplicate OPPs (both freq and volt are same) and opp->available - * -EEXIST Freq are same and volt are different OR + * OR if the OPP is not supported by hardware. + * ERR_PTR(-EEXIST): + * Freq are same and volt are different OR * Duplicate OPPs (both freq and volt are same) and !opp->available - * -ENOMEM Memory allocation failure - * -EINVAL Failed parsing the OPP node + * ERR_PTR(-ENOMEM): + * Memory allocation failure + * ERR_PTR(-EINVAL): + * Failed parsing the OPP node */ -static int _opp_add_static_v2(struct opp_table *opp_table, struct device *dev, - struct device_node *np) +static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, + struct device *dev, struct device_node *np) { struct dev_pm_opp *new_opp; u64 rate = 0; @@ -294,7 +321,7 @@ static int _opp_add_static_v2(struct opp_table *opp_table, struct device *dev, new_opp = _opp_allocate(opp_table); if (!new_opp) - return -ENOMEM; + return ERR_PTR(-ENOMEM); ret = of_property_read_u64(np, "opp-hz", &rate); if (ret < 0) { @@ -369,52 +396,47 @@ static int _opp_add_static_v2(struct opp_table *opp_table, struct device *dev, * frequency/voltage list. */ blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ADD, new_opp); - return 0; + return new_opp; free_opp: _opp_free(new_opp); - return ret; + return ERR_PTR(ret); } /* Initializes OPP tables based on new bindings */ -static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) +static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) { struct device_node *np; - struct opp_table *opp_table; - int ret = 0, count = 0, pstate_count = 0; + int ret, count = 0, pstate_count = 0; struct dev_pm_opp *opp; - opp_table = _managed_opp(opp_np); - if (opp_table) { - /* OPPs are already managed */ - if (!_add_opp_dev(dev, opp_table)) - ret = -ENOMEM; - goto put_opp_table; + /* OPP table is already initialized for the device */ + if (opp_table->parsed_static_opps) { + kref_get(&opp_table->list_kref); + return 0; } - opp_table = dev_pm_opp_get_opp_table(dev); - if (!opp_table) - return -ENOMEM; + kref_init(&opp_table->list_kref); /* We have opp-table node now, iterate over it and add OPPs */ - for_each_available_child_of_node(opp_np, np) { - count++; - - ret = _opp_add_static_v2(opp_table, dev, np); - if (ret) { + for_each_available_child_of_node(opp_table->np, np) { + opp = _opp_add_static_v2(opp_table, dev, np); + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); dev_err(dev, "%s: Failed to add OPP, %d\n", __func__, ret); - _dev_pm_opp_remove_table(opp_table, dev, false); of_node_put(np); - goto put_opp_table; + goto put_list_kref; + } else if (opp) { + count++; } } /* There should be one of more OPP defined */ if (WARN_ON(!count)) { ret = -ENOENT; - goto put_opp_table; + goto put_list_kref; } list_for_each_entry(opp, &opp_table->opp_list, node) @@ -425,28 +447,25 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) dev_err(dev, "Not all nodes have performance state set (%d: %d)\n", count, pstate_count); ret = -ENOENT; - goto put_opp_table; + goto put_list_kref; } if (pstate_count) opp_table->genpd_performance_state = true; - opp_table->np = opp_np; - if (of_property_read_bool(opp_np, "opp-shared")) - opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED; - else - opp_table->shared_opp = OPP_TABLE_ACCESS_EXCLUSIVE; + opp_table->parsed_static_opps = true; -put_opp_table: - dev_pm_opp_put_opp_table(opp_table); + return 0; + +put_list_kref: + _put_opp_list_kref(opp_table); return ret; } /* Initializes OPP tables based on old-deprecated bindings */ -static int _of_add_opp_table_v1(struct device *dev) +static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table) { - struct opp_table *opp_table; const struct property *prop; const __be32 *val; int nr, ret = 0; @@ -467,9 +486,7 @@ static int _of_add_opp_table_v1(struct device *dev) return -EINVAL; } - opp_table = dev_pm_opp_get_opp_table(dev); - if (!opp_table) - return -ENOMEM; + kref_init(&opp_table->list_kref); val = prop->value; while (nr) { @@ -480,13 +497,12 @@ static int _of_add_opp_table_v1(struct device *dev) if (ret) { dev_err(dev, "%s: Failed to add OPP %ld (%d)\n", __func__, freq, ret); - _dev_pm_opp_remove_table(opp_table, dev, false); - break; + _put_opp_list_kref(opp_table); + return ret; } nr -= 2; } - dev_pm_opp_put_opp_table(opp_table); return ret; } @@ -509,24 +525,24 @@ static int _of_add_opp_table_v1(struct device *dev) */ int dev_pm_opp_of_add_table(struct device *dev) { - struct device_node *opp_np; + struct opp_table *opp_table; int ret; + opp_table = dev_pm_opp_get_opp_table_indexed(dev, 0); + if (!opp_table) + return -ENOMEM; + /* - * OPPs have two version of bindings now. The older one is deprecated, - * try for the new binding first. + * OPPs have two version of bindings now. Also try the old (v1) + * bindings for backward compatibility with older dtbs. */ - opp_np = dev_pm_opp_of_get_opp_desc_node(dev); - if (!opp_np) { - /* - * Try old-deprecated bindings for backward compatibility with - * older dtbs. - */ - return _of_add_opp_table_v1(dev); - } + if (opp_table->np) + ret = _of_add_opp_table_v2(dev, opp_table); + else + ret = _of_add_opp_table_v1(dev, opp_table); - ret = _of_add_opp_table_v2(dev, opp_np); - of_node_put(opp_np); + if (ret) + dev_pm_opp_put_opp_table(opp_table); return ret; } @@ -553,28 +569,29 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table); */ int dev_pm_opp_of_add_table_indexed(struct device *dev, int index) { - struct device_node *opp_np; + struct opp_table *opp_table; int ret, count; -again: - opp_np = _opp_of_get_opp_desc_node(dev->of_node, index); - if (!opp_np) { + if (index) { /* * If only one phandle is present, then the same OPP table * applies for all index requests. */ count = of_count_phandle_with_args(dev->of_node, "operating-points-v2", NULL); - if (count == 1 && index) { - index = 0; - goto again; - } + if (count != 1) + return -ENODEV; - return -ENODEV; + index = 0; } - ret = _of_add_opp_table_v2(dev, opp_np); - of_node_put(opp_np); + opp_table = dev_pm_opp_get_opp_table_indexed(dev, index); + if (!opp_table) + return -ENOMEM; + + ret = _of_add_opp_table_v2(dev, opp_table); + if (ret) + dev_pm_opp_put_opp_table(opp_table); return ret; } @@ -591,7 +608,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_indexed); */ void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask) { - _dev_pm_opp_cpumask_remove_table(cpumask, true); + _dev_pm_opp_cpumask_remove_table(cpumask, -1); } EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table); @@ -604,16 +621,18 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table); int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask) { struct device *cpu_dev; - int cpu, ret = 0; + int cpu, ret; - WARN_ON(cpumask_empty(cpumask)); + if (WARN_ON(cpumask_empty(cpumask))) + return -ENODEV; for_each_cpu(cpu, cpumask) { cpu_dev = get_cpu_device(cpu); if (!cpu_dev) { pr_err("%s: failed to get cpu%d device\n", __func__, cpu); - continue; + ret = -ENODEV; + goto remove_table; } ret = dev_pm_opp_of_add_table(cpu_dev); @@ -625,12 +644,16 @@ int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask) pr_debug("%s: couldn't find opp table for cpu:%d, %d\n", __func__, cpu, ret); - /* Free all other OPPs */ - dev_pm_opp_of_cpumask_remove_table(cpumask); - break; + goto remove_table; } } + return 0; + +remove_table: + /* Free all other OPPs */ + _dev_pm_opp_cpumask_remove_table(cpumask, cpu); + return ret; } EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 7c540fd063b2dc..9c6544b4f4f901 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -126,9 +126,11 @@ enum opp_table_access { * @dev_list: list of devices that share these OPPs * @opp_list: table of opps * @kref: for reference count of the table. - * @lock: mutex protecting the opp_list. + * @list_kref: for reference count of the OPP list. + * @lock: mutex protecting the opp_list and dev_list. * @np: struct device_node pointer for opp's DT node. * @clock_latency_ns_max: Max clock latency in nanoseconds. + * @parsed_static_opps: True if OPPs are initialized from DT. * @shared_opp: OPP is shared between multiple devices. * @suspend_opp: Pointer to OPP to be used during device suspend. * @supported_hw: Array of version number to support. @@ -156,6 +158,7 @@ struct opp_table { struct list_head dev_list; struct list_head opp_list; struct kref kref; + struct kref list_kref; struct mutex lock; struct device_node *np; @@ -164,6 +167,7 @@ struct opp_table { /* For backward compatibility with v1 bindings */ unsigned int voltage_tolerance_v1; + bool parsed_static_opps; enum opp_table_access shared_opp; struct dev_pm_opp *suspend_opp; @@ -186,23 +190,26 @@ struct opp_table { /* Routines internal to opp core */ void dev_pm_opp_get(struct dev_pm_opp *opp); +void _opp_remove_all_static(struct opp_table *opp_table); void _get_opp_table_kref(struct opp_table *opp_table); int _get_opp_count(struct opp_table *opp_table); struct opp_table *_find_opp_table(struct device *dev); struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table); -void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev, bool remove_all); -void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all); +void _dev_pm_opp_find_and_remove_table(struct device *dev); struct dev_pm_opp *_opp_allocate(struct opp_table *opp_table); void _opp_free(struct dev_pm_opp *opp); int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table, bool rate_not_available); int _opp_add_v1(struct opp_table *opp_table, struct device *dev, unsigned long freq, long u_volt, bool dynamic); -void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of); +void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, int last_cpu); struct opp_table *_add_opp_table(struct device *dev); +void _put_opp_list_kref(struct opp_table *opp_table); #ifdef CONFIG_OF -void _of_init_opp_table(struct opp_table *opp_table, struct device *dev); +void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index); +struct opp_table *_managed_opp(struct device *dev, int index); #else -static inline void _of_init_opp_table(struct opp_table *opp_table, struct device *dev) {} +static inline void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index) {} +static inline struct opp_table *_managed_opp(struct device *dev, int index) { return NULL; } #endif #ifdef CONFIG_DEBUG_FS diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 295d8dcba48cdd..bb92874b1175ec 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -1133,47 +1133,40 @@ static const struct rapl_defaults rapl_defaults_cht = { .compute_time_window = rapl_compute_time_window_atom, }; -#define RAPL_CPU(_model, _ops) { \ - .vendor = X86_VENDOR_INTEL, \ - .family = 6, \ - .model = _model, \ - .driver_data = (kernel_ulong_t)&_ops, \ - } - static const struct x86_cpu_id rapl_ids[] __initconst = { - RAPL_CPU(INTEL_FAM6_SANDYBRIDGE, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_SANDYBRIDGE_X, rapl_defaults_core), - - RAPL_CPU(INTEL_FAM6_IVYBRIDGE, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_IVYBRIDGE_X, rapl_defaults_core), - - RAPL_CPU(INTEL_FAM6_HASWELL_CORE, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_HASWELL_ULT, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_HASWELL_GT3E, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_HASWELL_X, rapl_defaults_hsw_server), - - RAPL_CPU(INTEL_FAM6_BROADWELL_CORE, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_BROADWELL_GT3E, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_BROADWELL_XEON_D, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_BROADWELL_X, rapl_defaults_hsw_server), - - RAPL_CPU(INTEL_FAM6_SKYLAKE_DESKTOP, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_SKYLAKE_MOBILE, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_SKYLAKE_X, rapl_defaults_hsw_server), - RAPL_CPU(INTEL_FAM6_KABYLAKE_MOBILE, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_KABYLAKE_DESKTOP, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_CANNONLAKE_MOBILE, rapl_defaults_core), - - RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT1, rapl_defaults_byt), - RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT, rapl_defaults_cht), - RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD, rapl_defaults_tng), - RAPL_CPU(INTEL_FAM6_ATOM_MOOREFIELD, rapl_defaults_ann), - RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON, rapl_defaults_core), - - RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL, rapl_defaults_hsw_server), - RAPL_CPU(INTEL_FAM6_XEON_PHI_KNM, rapl_defaults_hsw_server), + INTEL_CPU_FAM6(SANDYBRIDGE, rapl_defaults_core), + INTEL_CPU_FAM6(SANDYBRIDGE_X, rapl_defaults_core), + + INTEL_CPU_FAM6(IVYBRIDGE, rapl_defaults_core), + INTEL_CPU_FAM6(IVYBRIDGE_X, rapl_defaults_core), + + INTEL_CPU_FAM6(HASWELL_CORE, rapl_defaults_core), + INTEL_CPU_FAM6(HASWELL_ULT, rapl_defaults_core), + INTEL_CPU_FAM6(HASWELL_GT3E, rapl_defaults_core), + INTEL_CPU_FAM6(HASWELL_X, rapl_defaults_hsw_server), + + INTEL_CPU_FAM6(BROADWELL_CORE, rapl_defaults_core), + INTEL_CPU_FAM6(BROADWELL_GT3E, rapl_defaults_core), + INTEL_CPU_FAM6(BROADWELL_XEON_D, rapl_defaults_core), + INTEL_CPU_FAM6(BROADWELL_X, rapl_defaults_hsw_server), + + INTEL_CPU_FAM6(SKYLAKE_DESKTOP, rapl_defaults_core), + INTEL_CPU_FAM6(SKYLAKE_MOBILE, rapl_defaults_core), + INTEL_CPU_FAM6(SKYLAKE_X, rapl_defaults_hsw_server), + INTEL_CPU_FAM6(KABYLAKE_MOBILE, rapl_defaults_core), + INTEL_CPU_FAM6(KABYLAKE_DESKTOP, rapl_defaults_core), + INTEL_CPU_FAM6(CANNONLAKE_MOBILE, rapl_defaults_core), + + INTEL_CPU_FAM6(ATOM_SILVERMONT1, rapl_defaults_byt), + INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht), + INTEL_CPU_FAM6(ATOM_MERRIFIELD, rapl_defaults_tng), + INTEL_CPU_FAM6(ATOM_MOOREFIELD, rapl_defaults_ann), + INTEL_CPU_FAM6(ATOM_GOLDMONT, rapl_defaults_core), + INTEL_CPU_FAM6(ATOM_GEMINI_LAKE, rapl_defaults_core), + INTEL_CPU_FAM6(ATOM_DENVERTON, rapl_defaults_core), + + INTEL_CPU_FAM6(XEON_PHI_KNL, rapl_defaults_hsw_server), + INTEL_CPU_FAM6(XEON_PHI_KNM, rapl_defaults_hsw_server), {} }; MODULE_DEVICE_TABLE(x86cpu, rapl_ids); diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 8e0b8250a13997..cf59e6210d271c 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -104,6 +104,7 @@ enum cppc_regs { * today. */ struct cppc_perf_caps { + u32 guaranteed_perf; u32 highest_perf; u32 nominal_perf; u32 lowest_perf; diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 4325d6fdde9b64..faed7a8977e85b 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -81,6 +81,7 @@ struct cpuidle_device { unsigned int registered:1; unsigned int enabled:1; unsigned int use_deepest_state:1; + unsigned int poll_time_limit:1; unsigned int cpu; int last_residency; @@ -99,16 +100,6 @@ struct cpuidle_device { DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev); -/** - * cpuidle_get_last_residency - retrieves the last state's residency time - * @dev: the target CPU - */ -static inline int cpuidle_get_last_residency(struct cpuidle_device *dev) -{ - return dev->last_residency; -} - - /**************************** * CPUIDLE DRIVER INTERFACE * ****************************/ diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 3aae5b3af87cf0..e4963b0f45da9b 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -198,6 +198,14 @@ extern void devm_devfreq_remove_device(struct device *dev, extern int devfreq_suspend_device(struct devfreq *devfreq); extern int devfreq_resume_device(struct devfreq *devfreq); +/** + * update_devfreq() - Reevaluate the device and configure frequency + * @devfreq: the devfreq device + * + * Note: devfreq->lock must be held + */ +extern int update_devfreq(struct devfreq *devfreq); + /* Helper functions for devfreq user device driver with OPP. */ extern struct dev_pm_opp *devfreq_recommended_opp(struct device *dev, unsigned long *freq, u32 flags); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 776c546d581afb..3b5d7280e52e19 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -17,11 +17,36 @@ #include #include -/* Defines used for the flags field in the struct generic_pm_domain */ -#define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ -#define GENPD_FLAG_IRQ_SAFE (1U << 1) /* PM domain operates in atomic */ -#define GENPD_FLAG_ALWAYS_ON (1U << 2) /* PM domain is always powered on */ -#define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) /* Keep devices active if wakeup */ +/* + * Flags to control the behaviour of a genpd. + * + * These flags may be set in the struct generic_pm_domain's flags field by a + * genpd backend driver. The flags must be set before it calls pm_genpd_init(), + * which initializes a genpd. + * + * GENPD_FLAG_PM_CLK: Instructs genpd to use the PM clk framework, + * while powering on/off attached devices. + * + * GENPD_FLAG_IRQ_SAFE: This informs genpd that its backend callbacks, + * ->power_on|off(), doesn't sleep. Hence, these + * can be invoked from within atomic context, which + * enables genpd to power on/off the PM domain, + * even when pm_runtime_is_irq_safe() returns true, + * for any of its attached devices. Note that, a + * genpd having this flag set, requires its + * masterdomains to also have it set. + * + * GENPD_FLAG_ALWAYS_ON: Instructs genpd to always keep the PM domain + * powered on. + * + * GENPD_FLAG_ACTIVE_WAKEUP: Instructs genpd to keep the PM domain powered + * on, in case any of its attached devices is used + * in the wakeup path to serve system wakeups. + */ +#define GENPD_FLAG_PM_CLK (1U << 0) +#define GENPD_FLAG_IRQ_SAFE (1U << 1) +#define GENPD_FLAG_ALWAYS_ON (1U << 2) +#define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 099b31960dec24..5d399eeef17277 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -79,6 +79,7 @@ struct dev_pm_set_opp_data { #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); +struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev, int index); void dev_pm_opp_put_opp_table(struct opp_table *opp_table); unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); @@ -136,6 +137,11 @@ static inline struct opp_table *dev_pm_opp_get_opp_table(struct device *dev) return ERR_PTR(-ENOTSUPP); } +static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev, int index) +{ + return ERR_PTR(-ENOTSUPP); +} + static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {} static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) diff --git a/kernel/power/process.c b/kernel/power/process.c index 7381d49a44db5a..4b6a54da7e65bd 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -96,7 +96,7 @@ static int try_to_freeze_tasks(bool user_only) if (wq_busy) show_workqueue_state(); - if (!wakeup) { + if (!wakeup || pm_debug_messages_on) { read_lock(&tasklist_lock); for_each_process_thread(g, p) { if (p != current && !freezer_should_skip(p) diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c index 9ba8a44ad2a714..84caee38418f03 100644 --- a/tools/power/cpupower/bench/parse.c +++ b/tools/power/cpupower/bench/parse.c @@ -145,7 +145,7 @@ struct config *prepare_default_config() config->cpu = 0; config->prio = SCHED_HIGH; config->verbose = 0; - strncpy(config->governor, "ondemand", 8); + strncpy(config->governor, "ondemand", sizeof(config->governor)); config->output = stdout; diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index df43cd45d81044..ccd08dd009962d 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -200,6 +200,8 @@ static int get_boost_mode(unsigned int cpu) printf(_(" Boost States: %d\n"), b_states); printf(_(" Total States: %d\n"), pstate_no); for (i = 0; i < pstate_no; i++) { + if (!pstates[i]) + continue; if (i < b_states) printf(_(" Pstate-Pb%d: %luMHz (boost state)" "\n"), i, pstates[i]); diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index bb41cdd0df6bfe..9607ada5b29a6a 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -33,7 +33,7 @@ union msr_pstate { unsigned vid:8; unsigned iddval:8; unsigned idddiv:2; - unsigned res1:30; + unsigned res1:31; unsigned en:1; } fam17h_bits; unsigned long long val; @@ -119,6 +119,11 @@ int decode_pstates(unsigned int cpu, unsigned int cpu_family, } if (read_msr(cpu, MSR_AMD_PSTATE + i, &pstate.val)) return -1; + if ((cpu_family == 0x17) && (!pstate.fam17h_bits.en)) + continue; + else if (!pstate.bits.en) + continue; + pstates[i] = get_cof(cpu_family, pstate); } *no = i; diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile index c1899cd72c80eb..845541544570a3 100644 --- a/tools/power/pm-graph/Makefile +++ b/tools/power/pm-graph/Makefile @@ -23,8 +23,8 @@ install : uninstall install -m 644 config/suspend-x2-proc.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config install -d $(DESTDIR)$(PREFIX)/bin - ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph - ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph + ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph + ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph install -d $(DESTDIR)$(PREFIX)/share/man/man8 install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8 diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py index 8ee626c0f6a5d8..6dae5704153746 100755 --- a/tools/power/pm-graph/bootgraph.py +++ b/tools/power/pm-graph/bootgraph.py @@ -34,6 +34,10 @@ from subprocess import call, Popen, PIPE import sleepgraph as aslib +def pprint(msg): + print(msg) + sys.stdout.flush() + # ----------------- CLASSES -------------------- # Class: SystemValues @@ -157,11 +161,11 @@ def cronjobCmdString(self): return cmdline def manualRebootRequired(self): cmdline = self.kernelParams() - print 'To generate a new timeline manually, follow these steps:\n' - print '1. Add the CMDLINE string to your kernel command line.' - print '2. Reboot the system.' - print '3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n' - print 'CMDLINE="%s"' % cmdline + pprint('To generate a new timeline manually, follow these steps:\n\n'\ + '1. Add the CMDLINE string to your kernel command line.\n'\ + '2. Reboot the system.\n'\ + '3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n\n'\ + 'CMDLINE="%s"' % cmdline) sys.exit() def blGrub(self): blcmd = '' @@ -431,7 +435,7 @@ def parseTraceLog(data): if len(cg.list) < 1 or cg.invalid or (cg.end - cg.start == 0): continue if(not cg.postProcess()): - print('Sanity check failed for %s-%d' % (proc, pid)) + pprint('Sanity check failed for %s-%d' % (proc, pid)) continue # match cg data to devices devname = data.deviceMatch(pid, cg) @@ -442,8 +446,8 @@ def parseTraceLog(data): sysvals.vprint('%s callgraph found for %s %s-%d [%f - %f]' %\ (kind, cg.name, proc, pid, cg.start, cg.end)) elif len(cg.list) > 1000000: - print 'WARNING: the callgraph found for %s is massive! (%d lines)' %\ - (devname, len(cg.list)) + pprint('WARNING: the callgraph found for %s is massive! (%d lines)' %\ + (devname, len(cg.list))) # Function: retrieveLogs # Description: @@ -528,7 +532,7 @@ def createBootGraph(data): tMax = data.end tTotal = tMax - t0 if(tTotal == 0): - print('ERROR: No timeline data') + pprint('ERROR: No timeline data') return False user_mode = '%.0f'%(data.tUserMode*1000) last_init = '%.0f'%(tTotal*1000) @@ -734,7 +738,7 @@ def updateCron(restore=False): op.close() res = call([cmd, cronfile]) except Exception, e: - print 'Exception: %s' % str(e) + pprint('Exception: %s' % str(e)) shutil.move(backfile, cronfile) res = -1 if res != 0: @@ -750,7 +754,7 @@ def updateGrub(restore=False): call(sysvals.blexec, stderr=PIPE, stdout=PIPE, env={'PATH': '.:/sbin:/usr/sbin:/usr/bin:/sbin:/bin'}) except Exception, e: - print 'Exception: %s\n' % str(e) + pprint('Exception: %s\n' % str(e)) return # extract the option and create a grub config without it sysvals.rootUser(True) @@ -797,7 +801,7 @@ def updateGrub(restore=False): res = call(sysvals.blexec) os.remove(grubfile) except Exception, e: - print 'Exception: %s' % str(e) + pprint('Exception: %s' % str(e)) res = -1 # cleanup shutil.move(tempfile, grubfile) @@ -821,7 +825,7 @@ def updateKernelParams(restore=False): def doError(msg, help=False): if help == True: printHelp() - print 'ERROR: %s\n' % msg + pprint('ERROR: %s\n' % msg) sysvals.outputResult({'error':msg}) sys.exit() @@ -829,52 +833,51 @@ def doError(msg, help=False): # Description: # print out the help text def printHelp(): - print('') - print('%s v%s' % (sysvals.title, sysvals.version)) - print('Usage: bootgraph ') - print('') - print('Description:') - print(' This tool reads in a dmesg log of linux kernel boot and') - print(' creates an html representation of the boot timeline up to') - print(' the start of the init process.') - print('') - print(' If no specific command is given the tool reads the current dmesg') - print(' and/or ftrace log and creates a timeline') - print('') - print(' Generates output files in subdirectory: boot-yymmdd-HHMMSS') - print(' HTML output: _boot.html') - print(' raw dmesg output: _boot_dmesg.txt') - print(' raw ftrace output: _boot_ftrace.txt') - print('') - print('Options:') - print(' -h Print this help text') - print(' -v Print the current tool version') - print(' -verbose Print extra information during execution and analysis') - print(' -addlogs Add the dmesg log to the html output') - print(' -result fn Export a results table to a text file for parsing.') - print(' -o name Overrides the output subdirectory name when running a new test') - print(' default: boot-{date}-{time}') - print(' [advanced]') - print(' -fstat Use ftrace to add function detail and statistics (default: disabled)') - print(' -f/-callgraph Add callgraph detail, can be very large (default: disabled)') - print(' -maxdepth N limit the callgraph data to N call levels (default: 2)') - print(' -mincg ms Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)') - print(' -timeprec N Number of significant digits in timestamps (0:S, 3:ms, [6:us])') - print(' -expandcg pre-expand the callgraph data in the html output (default: disabled)') - print(' -func list Limit ftrace to comma-delimited list of functions (default: do_one_initcall)') - print(' -cgfilter S Filter the callgraph output in the timeline') - print(' -cgskip file Callgraph functions to skip, off to disable (default: cgskip.txt)') - print(' -bl name Use the following boot loader for kernel params (default: grub)') - print(' -reboot Reboot the machine automatically and generate a new timeline') - print(' -manual Show the steps to generate a new timeline manually (used with -reboot)') - print('') - print('Other commands:') - print(' -flistall Print all functions capable of being captured in ftrace') - print(' -sysinfo Print out system info extracted from BIOS') - print(' [redo]') - print(' -dmesg file Create HTML output using dmesg input (used with -ftrace)') - print(' -ftrace file Create HTML output using ftrace input (used with -dmesg)') - print('') + pprint('\n%s v%s\n'\ + 'Usage: bootgraph \n'\ + '\n'\ + 'Description:\n'\ + ' This tool reads in a dmesg log of linux kernel boot and\n'\ + ' creates an html representation of the boot timeline up to\n'\ + ' the start of the init process.\n'\ + '\n'\ + ' If no specific command is given the tool reads the current dmesg\n'\ + ' and/or ftrace log and creates a timeline\n'\ + '\n'\ + ' Generates output files in subdirectory: boot-yymmdd-HHMMSS\n'\ + ' HTML output: _boot.html\n'\ + ' raw dmesg output: _boot_dmesg.txt\n'\ + ' raw ftrace output: _boot_ftrace.txt\n'\ + '\n'\ + 'Options:\n'\ + ' -h Print this help text\n'\ + ' -v Print the current tool version\n'\ + ' -verbose Print extra information during execution and analysis\n'\ + ' -addlogs Add the dmesg log to the html output\n'\ + ' -result fn Export a results table to a text file for parsing.\n'\ + ' -o name Overrides the output subdirectory name when running a new test\n'\ + ' default: boot-{date}-{time}\n'\ + ' [advanced]\n'\ + ' -fstat Use ftrace to add function detail and statistics (default: disabled)\n'\ + ' -f/-callgraph Add callgraph detail, can be very large (default: disabled)\n'\ + ' -maxdepth N limit the callgraph data to N call levels (default: 2)\n'\ + ' -mincg ms Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)\n'\ + ' -timeprec N Number of significant digits in timestamps (0:S, 3:ms, [6:us])\n'\ + ' -expandcg pre-expand the callgraph data in the html output (default: disabled)\n'\ + ' -func list Limit ftrace to comma-delimited list of functions (default: do_one_initcall)\n'\ + ' -cgfilter S Filter the callgraph output in the timeline\n'\ + ' -cgskip file Callgraph functions to skip, off to disable (default: cgskip.txt)\n'\ + ' -bl name Use the following boot loader for kernel params (default: grub)\n'\ + ' -reboot Reboot the machine automatically and generate a new timeline\n'\ + ' -manual Show the steps to generate a new timeline manually (used with -reboot)\n'\ + '\n'\ + 'Other commands:\n'\ + ' -flistall Print all functions capable of being captured in ftrace\n'\ + ' -sysinfo Print out system info extracted from BIOS\n'\ + ' [redo]\n'\ + ' -dmesg file Create HTML output using dmesg input (used with -ftrace)\n'\ + ' -ftrace file Create HTML output using ftrace input (used with -dmesg)\n'\ + '' % (sysvals.title, sysvals.version)) return True # ----------------- MAIN -------------------- @@ -895,7 +898,7 @@ def printHelp(): printHelp() sys.exit() elif(arg == '-v'): - print("Version %s" % sysvals.version) + pprint("Version %s" % sysvals.version) sys.exit() elif(arg == '-verbose'): sysvals.verbose = True @@ -1016,7 +1019,7 @@ def printHelp(): print f elif cmd == 'checkbl': sysvals.getBootLoader() - print 'Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec) + pprint('Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec)) elif(cmd == 'sysinfo'): sysvals.printSystemInfo(True) sys.exit() diff --git a/tools/power/pm-graph/config/cgskip.txt b/tools/power/pm-graph/config/cgskip.txt index e48d588fbfb4ff..9ff88e7e230062 100644 --- a/tools/power/pm-graph/config/cgskip.txt +++ b/tools/power/pm-graph/config/cgskip.txt @@ -27,6 +27,7 @@ ktime_get # console calls printk dev_printk +__dev_printk console_unlock # memory handling diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg index f8fcb06fd68b3b..4f80ad7d72755b 100644 --- a/tools/power/pm-graph/config/custom-timeline-functions.cfg +++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg @@ -105,7 +105,7 @@ override-dev-timeline-functions: true # example: [color=#CC00CC] # # arglist: A list of arguments from registers/stack addresses. See URL: -# https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst +# https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt # # example: cpu=%di:s32 # @@ -170,7 +170,7 @@ pm_restore_console: # example: [color=#CC00CC] # # arglist: A list of arguments from registers/stack addresses. See URL: -# https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst +# https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt # # example: port=+36(%di):s32 # diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8 index 070be2cf7f7439..24a2e7d0ae630f 100644 --- a/tools/power/pm-graph/sleepgraph.8 +++ b/tools/power/pm-graph/sleepgraph.8 @@ -65,9 +65,9 @@ During test, enable/disable runtime suspend for all devices. The test is delayed by 5 seconds to allow runtime suspend changes to occur. The settings are restored after the test is complete. .TP -\fB-display \fIon/off\fR -Turn the display on or off for the test using the xset command. This helps -maintain the consistecy of test data for better comparison. +\fB-display \fIon/off/standby/suspend\fR +Switch the display to the requested mode for the test using the xset command. +This helps maintain the consistency of test data for better comparison. .TP \fB-skiphtml\fR Run the test and capture the trace logs, but skip the timeline generation. @@ -183,6 +183,13 @@ Print out the contents of the ACPI Firmware Performance Data Table. \fB-battery\fR Print out battery status and current charge. .TP +\fB-xon/-xoff/-xstandby/-xsuspend\fR +Test xset by attempting to switch the display to the given mode. This +is the same command which will be issued by \fB-display \fImode\fR. +.TP +\fB-xstat\fR +Get the current DPMS display mode. +.TP \fB-sysinfo\fR Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode. .TP diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 0c760478f7d7dd..52618f3444d4cf 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -54,6 +54,7 @@ import string import re import platform +import signal from datetime import datetime import struct import ConfigParser @@ -61,6 +62,10 @@ from threading import Thread from subprocess import call, Popen, PIPE +def pprint(msg): + print(msg) + sys.stdout.flush() + # ----------------- CLASSES -------------------- # Class: SystemValues @@ -69,10 +74,10 @@ # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.1' + version = '5.2' ansi = False rs = 0 - display = 0 + display = '' gzip = False sync = False verbose = False @@ -99,6 +104,7 @@ class SystemValues: tpath = '/sys/kernel/debug/tracing/' fpdtpath = '/sys/firmware/acpi/tables/FPDT' epath = '/sys/kernel/debug/tracing/events/power/' + pmdpath = '/sys/power/pm_debug_messages' traceevents = [ 'suspend_resume', 'device_pm_callback_end', @@ -109,8 +115,10 @@ class SystemValues: mempath = '/dev/mem' powerfile = '/sys/power/state' mempowerfile = '/sys/power/mem_sleep' + diskpowerfile = '/sys/power/disk' suspendmode = 'mem' memmode = '' + diskmode = '' hostname = 'localhost' prefix = 'test' teststamp = '' @@ -137,16 +145,15 @@ class SystemValues: useprocmon = False notestrun = False cgdump = False + devdump = False mixedphaseheight = True devprops = dict() predelay = 0 postdelay = 0 - procexecfmt = 'ps - (?P.*)$' - devpropfmt = '# Device Properties: .*' - tracertypefmt = '# tracer: (?P.*)' - firmwarefmt = '# fwsuspend (?P[0-9]*) fwresume (?P[0-9]*)$' + pmdebug = '' tracefuncs = { 'sys_sync': {}, + 'ksys_sync': {}, '__pm_notifier_call_chain': {}, 'pm_prepare_console': {}, 'pm_notifier_call_chain': {}, @@ -187,7 +194,6 @@ class SystemValues: dev_tracefuncs = { # general wait/delay/sleep 'msleep': { 'args_x86_64': {'time':'%di:s32'}, 'ub': 1 }, - 'schedule_timeout_uninterruptible': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 }, 'schedule_timeout': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 }, 'udelay': { 'func':'__const_udelay', 'args_x86_64': {'loops':'%di:s32'}, 'ub': 1 }, 'usleep_range': { 'args_x86_64': {'min':'%di:s32', 'max':'%si:s32'}, 'ub': 1 }, @@ -199,6 +205,9 @@ class SystemValues: # filesystem 'ext4_sync_fs': {}, # 80211 + 'ath10k_bmi_read_memory': { 'args_x86_64': {'length':'%cx:s32'} }, + 'ath10k_bmi_write_memory': { 'args_x86_64': {'length':'%cx:s32'} }, + 'ath10k_bmi_fast_download': { 'args_x86_64': {'length':'%cx:s32'} }, 'iwlagn_mac_start': {}, 'iwlagn_alloc_bcast_station': {}, 'iwl_trans_pcie_start_hw': {}, @@ -241,6 +250,7 @@ class SystemValues: timeformat = '%.3f' cmdline = '%s %s' % \ (os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:])) + sudouser = '' def __init__(self): self.archargs = 'args_'+platform.machine() self.hostname = platform.node() @@ -256,27 +266,49 @@ def __init__(self): if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()): self.ansi = True self.testdir = datetime.now().strftime('suspend-%y%m%d-%H%M%S') + if os.getuid() == 0 and 'SUDO_USER' in os.environ and \ + os.environ['SUDO_USER']: + self.sudouser = os.environ['SUDO_USER'] def vprint(self, msg): self.logmsg += msg+'\n' - if(self.verbose): - print(msg) + if self.verbose or msg.startswith('WARNING:'): + pprint(msg) + def signalHandler(self, signum, frame): + if not self.result: + return + signame = self.signames[signum] if signum in self.signames else 'UNKNOWN' + msg = 'Signal %s caused a tool exit, line %d' % (signame, frame.f_lineno) + sysvals.outputResult({'error':msg}) + sys.exit(3) + def signalHandlerInit(self): + capture = ['BUS', 'SYS', 'XCPU', 'XFSZ', 'PWR', 'HUP', 'INT', 'QUIT', + 'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM', 'TSTP'] + self.signames = dict() + for i in capture: + s = 'SIG'+i + try: + signum = getattr(signal, s) + signal.signal(signum, self.signalHandler) + except: + continue + self.signames[signum] = s def rootCheck(self, fatal=True): if(os.access(self.powerfile, os.W_OK)): return True if fatal: msg = 'This command requires sysfs mount and root access' - print('ERROR: %s\n') % msg + pprint('ERROR: %s\n' % msg) self.outputResult({'error':msg}) - sys.exit() + sys.exit(1) return False def rootUser(self, fatal=False): if 'USER' in os.environ and os.environ['USER'] == 'root': return True if fatal: msg = 'This command must be run as root' - print('ERROR: %s\n') % msg + pprint('ERROR: %s\n' % msg) self.outputResult({'error':msg}) - sys.exit() + sys.exit(1) return False def getExec(self, cmd): dirlist = ['/sbin', '/bin', '/usr/sbin', '/usr/bin', @@ -406,8 +438,8 @@ def initdmesg(self): ktime = m.group('ktime') fp.close() self.dmesgstart = float(ktime) - def getdmesg(self, fwdata=[]): - op = self.writeDatafileHeader(sysvals.dmesgfile, fwdata) + def getdmesg(self, testdata): + op = self.writeDatafileHeader(sysvals.dmesgfile, testdata) # store all new dmesg lines since initdmesg was called fp = Popen('dmesg', stdout=PIPE).stdout for line in fp: @@ -535,7 +567,7 @@ def addKprobes(self, output=False): if len(self.kprobes) < 1: return if output: - print(' kprobe functions in this kernel:') + pprint(' kprobe functions in this kernel:') # first test each kprobe rejects = [] # sort kprobes: trace, ub-dev, custom, dev @@ -557,7 +589,7 @@ def addKprobes(self, output=False): else: kpl[2].append(name) if output: - print(' %s: %s' % (name, res)) + pprint(' %s: %s' % (name, res)) kplist = kpl[0] + kpl[1] + kpl[2] + kpl[3] # remove all failed ones from the list for name in rejects: @@ -571,7 +603,7 @@ def addKprobes(self, output=False): if output: check = self.fgetVal('kprobe_events') linesack = (len(check.split('\n')) - 1) / 2 - print(' kprobe functions enabled: %d/%d' % (linesack, linesout)) + pprint(' kprobe functions enabled: %d/%d' % (linesack, linesout)) self.fsetVal('1', 'events/kprobes/enable') def testKprobe(self, kname, kprobe): self.fsetVal('0', 'events/kprobes/enable') @@ -619,6 +651,8 @@ def cleanupFtrace(self): self.fsetVal('0', 'events/kprobes/enable') self.fsetVal('', 'kprobe_events') self.fsetVal('1024', 'buffer_size_kb') + if self.pmdebug: + self.setVal(self.pmdebug, self.pmdpath) def setupAllKprobes(self): for name in self.tracefuncs: self.defaultKprobe(name, self.tracefuncs[name]) @@ -637,10 +671,15 @@ def isCallgraphFunc(self, name): return False def initFtrace(self): self.printSystemInfo(False) - print('INITIALIZING FTRACE...') + pprint('INITIALIZING FTRACE...') # turn trace off self.fsetVal('0', 'tracing_on') self.cleanupFtrace() + # pm debug messages + pv = self.getVal(self.pmdpath) + if pv != '1': + self.setVal('1', self.pmdpath) + self.pmdebug = pv # set the trace clock to global self.fsetVal('global', 'trace_clock') self.fsetVal('nop', 'current_tracer') @@ -649,7 +688,8 @@ def initFtrace(self): if self.bufsize > 0: tgtsize = self.bufsize elif self.usecallgraph or self.usedevsrc: - tgtsize = min(self.memfree, 3*1024*1024) + bmax = (1*1024*1024) if self.suspendmode == 'disk' else (3*1024*1024) + tgtsize = min(self.memfree, bmax) else: tgtsize = 65536 while not self.fsetVal('%d' % (tgtsize / cpus), 'buffer_size_kb'): @@ -658,7 +698,7 @@ def initFtrace(self): if tgtsize < 65536: tgtsize = int(self.fgetVal('buffer_size_kb')) * cpus break - print 'Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus) + pprint('Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus)) # initialize the callgraph trace if(self.usecallgraph): # set trace type @@ -691,7 +731,7 @@ def initFtrace(self): if self.usedevsrc: for name in self.dev_tracefuncs: self.defaultKprobe(name, self.dev_tracefuncs[name]) - print('INITIALIZING KPROBES...') + pprint('INITIALIZING KPROBES...') self.addKprobes(self.verbose) if(self.usetraceevents): # turn trace events on @@ -728,19 +768,24 @@ def colorText(self, str, color=31): if not self.ansi: return str return '\x1B[%d;40m%s\x1B[m' % (color, str) - def writeDatafileHeader(self, filename, fwdata=[]): + def writeDatafileHeader(self, filename, testdata): fp = self.openlog(filename, 'w') fp.write('%s\n%s\n# command | %s\n' % (self.teststamp, self.sysstamp, self.cmdline)) - if(self.suspendmode == 'mem' or self.suspendmode == 'command'): - for fw in fwdata: + for test in testdata: + if 'fw' in test: + fw = test['fw'] if(fw): fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1])) + if 'bat' in test: + (a1, c1), (a2, c2) = test['bat'] + fp.write('# battery %s %d %s %d\n' % (a1, c1, a2, c2)) + if test['error'] or len(testdata) > 1: + fp.write('# enter_sleep_error %s\n' % test['error']) return fp - def sudouser(self, dir): - if os.path.exists(dir) and os.getuid() == 0 and \ - 'SUDO_USER' in os.environ: + def sudoUserchown(self, dir): + if os.path.exists(dir) and self.sudouser: cmd = 'chown -R {0}:{0} {1} > /dev/null 2>&1' - call(cmd.format(os.environ['SUDO_USER'], dir), shell=True) + call(cmd.format(self.sudouser, dir), shell=True) def outputResult(self, testdata, num=0): if not self.result: return @@ -762,7 +807,7 @@ def outputResult(self, testdata, num=0): if 'bugurl' in testdata: fp.write('url%s: %s\n' % (n, testdata['bugurl'])) fp.close() - self.sudouser(self.result) + self.sudoUserchown(self.result) def configFile(self, file): dir = os.path.dirname(os.path.realpath(__file__)) if os.path.exists(file): @@ -800,15 +845,16 @@ def openlog(self, filename, mode): # Simple class which holds property values collected # for all the devices used in the timeline. class DevProps: - syspath = '' - altname = '' - async = True - xtraclass = '' - xtrainfo = '' + def __init__(self): + self.syspath = '' + self.altname = '' + self.async = True + self.xtraclass = '' + self.xtrainfo = '' def out(self, dev): return '%s,%s,%d;' % (dev, self.altname, self.async) def debug(self, dev): - print '%s:\n\taltname = %s\n\t async = %s' % (dev, self.altname, self.async) + pprint('%s:\n\taltname = %s\n\t async = %s' % (dev, self.altname, self.async)) def altName(self, dev): if not self.altname or self.altname == dev: return dev @@ -831,9 +877,6 @@ def xtraInfo(self): # A container used to create a device hierachy, with a single root node # and a tree of child nodes. Used by Data.deviceTopology() class DeviceNode: - name = '' - children = 0 - depth = 0 def __init__(self, nodename, nodedepth): self.name = nodename self.children = [] @@ -861,71 +904,78 @@ def __init__(self, nodename, nodedepth): # } # class Data: - dmesg = {} # root data structure - phases = [] # ordered list of phases - start = 0.0 # test start - end = 0.0 # test end - tSuspended = 0.0 # low-level suspend start - tResumed = 0.0 # low-level resume start - tKernSus = 0.0 # kernel level suspend start - tKernRes = 0.0 # kernel level resume end - tLow = 0.0 # time spent in low-level suspend (standby/freeze) - fwValid = False # is firmware data available - fwSuspend = 0 # time spent in firmware suspend - fwResume = 0 # time spent in firmware resume - dmesgtext = [] # dmesg text file in memory - pstl = 0 # process timeline - testnumber = 0 - idstr = '' - html_device_id = 0 - stamp = 0 - outfile = '' - devpids = [] - kerror = False + phasedef = { + 'suspend_prepare': {'order': 0, 'color': '#CCFFCC'}, + 'suspend': {'order': 1, 'color': '#88FF88'}, + 'suspend_late': {'order': 2, 'color': '#00AA00'}, + 'suspend_noirq': {'order': 3, 'color': '#008888'}, + 'suspend_machine': {'order': 4, 'color': '#0000FF'}, + 'resume_machine': {'order': 5, 'color': '#FF0000'}, + 'resume_noirq': {'order': 6, 'color': '#FF9900'}, + 'resume_early': {'order': 7, 'color': '#FFCC00'}, + 'resume': {'order': 8, 'color': '#FFFF88'}, + 'resume_complete': {'order': 9, 'color': '#FFFFCC'}, + } + errlist = { + 'HWERROR' : '.*\[ *Hardware Error *\].*', + 'FWBUG' : '.*\[ *Firmware Bug *\].*', + 'BUG' : '.*BUG.*', + 'ERROR' : '.*ERROR.*', + 'WARNING' : '.*WARNING.*', + 'IRQ' : '.*genirq: .*', + 'TASKFAIL': '.*Freezing of tasks failed.*', + } def __init__(self, num): idchar = 'abcdefghij' - self.pstl = dict() + self.start = 0.0 # test start + self.end = 0.0 # test end + self.tSuspended = 0.0 # low-level suspend start + self.tResumed = 0.0 # low-level resume start + self.tKernSus = 0.0 # kernel level suspend start + self.tKernRes = 0.0 # kernel level resume end + self.fwValid = False # is firmware data available + self.fwSuspend = 0 # time spent in firmware suspend + self.fwResume = 0 # time spent in firmware resume + self.html_device_id = 0 + self.stamp = 0 + self.outfile = '' + self.kerror = False + self.battery = 0 + self.enterfail = '' + self.currphase = '' + self.pstl = dict() # process timeline self.testnumber = num self.idstr = idchar[num] - self.dmesgtext = [] - self.phases = [] - self.dmesg = { # fixed list of 10 phases - 'suspend_prepare': {'list': dict(), 'start': -1.0, 'end': -1.0, - 'row': 0, 'color': '#CCFFCC', 'order': 0}, - 'suspend': {'list': dict(), 'start': -1.0, 'end': -1.0, - 'row': 0, 'color': '#88FF88', 'order': 1}, - 'suspend_late': {'list': dict(), 'start': -1.0, 'end': -1.0, - 'row': 0, 'color': '#00AA00', 'order': 2}, - 'suspend_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0, - 'row': 0, 'color': '#008888', 'order': 3}, - 'suspend_machine': {'list': dict(), 'start': -1.0, 'end': -1.0, - 'row': 0, 'color': '#0000FF', 'order': 4}, - 'resume_machine': {'list': dict(), 'start': -1.0, 'end': -1.0, - 'row': 0, 'color': '#FF0000', 'order': 5}, - 'resume_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0, - 'row': 0, 'color': '#FF9900', 'order': 6}, - 'resume_early': {'list': dict(), 'start': -1.0, 'end': -1.0, - 'row': 0, 'color': '#FFCC00', 'order': 7}, - 'resume': {'list': dict(), 'start': -1.0, 'end': -1.0, - 'row': 0, 'color': '#FFFF88', 'order': 8}, - 'resume_complete': {'list': dict(), 'start': -1.0, 'end': -1.0, - 'row': 0, 'color': '#FFFFCC', 'order': 9} - } - self.phases = self.sortedPhases() + self.dmesgtext = [] # dmesg text file in memory + self.dmesg = dict() # root data structure + self.errorinfo = {'suspend':[],'resume':[]} + self.tLow = [] # time spent in low-level suspends (standby/freeze) + self.devpids = [] + self.devicegroups = 0 + def sortedPhases(self): + return sorted(self.dmesg, key=lambda k:self.dmesg[k]['order']) + def initDevicegroups(self): + # called when phases are all finished being added + for phase in self.dmesg.keys(): + if '*' in phase: + p = phase.split('*') + pnew = '%s%d' % (p[0], len(p)) + self.dmesg[pnew] = self.dmesg.pop(phase) self.devicegroups = [] - for phase in self.phases: + for phase in self.sortedPhases(): self.devicegroups.append([phase]) - self.errorinfo = {'suspend':[],'resume':[]} + def nextPhase(self, phase, offset): + order = self.dmesg[phase]['order'] + offset + for p in self.dmesg: + if self.dmesg[p]['order'] == order: + return p + return '' + def lastPhase(self): + plist = self.sortedPhases() + if len(plist) < 1: + return '' + return plist[-1] def extractErrorInfo(self): - elist = { - 'HWERROR' : '.*\[ *Hardware Error *\].*', - 'FWBUG' : '.*\[ *Firmware Bug *\].*', - 'BUG' : '.*BUG.*', - 'ERROR' : '.*ERROR.*', - 'WARNING' : '.*WARNING.*', - 'IRQ' : '.*genirq: .*', - 'TASKFAIL': '.*Freezing of tasks failed.*', - } lf = sysvals.openlog(sysvals.dmesgfile, 'r') i = 0 list = [] @@ -939,8 +989,8 @@ def extractErrorInfo(self): continue dir = 'suspend' if t < self.tSuspended else 'resume' msg = m.group('msg') - for err in elist: - if re.match(elist[err], msg): + for err in self.errlist: + if re.match(self.errlist[err], msg): list.append((err, dir, t, i, i)) self.kerror = True break @@ -956,7 +1006,7 @@ def setStart(self, time): def setEnd(self, time): self.end = time def isTraceEventOutsideDeviceCalls(self, pid, time): - for phase in self.phases: + for phase in self.sortedPhases(): list = self.dmesg[phase]['list'] for dev in list: d = list[dev] @@ -964,16 +1014,10 @@ def isTraceEventOutsideDeviceCalls(self, pid, time): time < d['end']): return False return True - def phaseCollision(self, phase, isbegin, line): - key = 'end' - if isbegin: - key = 'start' - if self.dmesg[phase][key] >= 0: - sysvals.vprint('IGNORE: %s' % line.strip()) - return True - return False def sourcePhase(self, start): - for phase in self.phases: + for phase in self.sortedPhases(): + if 'machine' in phase: + continue pend = self.dmesg[phase]['end'] if start <= pend: return phase @@ -1004,14 +1048,15 @@ def sourceDevice(self, phaselist, start, end, pid, type): return tgtdev def addDeviceFunctionCall(self, displayname, kprobename, proc, pid, start, end, cdata, rdata): # try to place the call in a device - tgtdev = self.sourceDevice(self.phases, start, end, pid, 'device') + phases = self.sortedPhases() + tgtdev = self.sourceDevice(phases, start, end, pid, 'device') # calls with device pids that occur outside device bounds are dropped # TODO: include these somehow if not tgtdev and pid in self.devpids: return False # try to place the call in a thread if not tgtdev: - tgtdev = self.sourceDevice(self.phases, start, end, pid, 'thread') + tgtdev = self.sourceDevice(phases, start, end, pid, 'thread') # create new thread blocks, expand as new calls are found if not tgtdev: if proc == '<...>': @@ -1053,7 +1098,7 @@ def addDeviceFunctionCall(self, displayname, kprobename, proc, pid, start, end, def overflowDevices(self): # get a list of devices that extend beyond the end of this test run devlist = [] - for phase in self.phases: + for phase in self.sortedPhases(): list = self.dmesg[phase]['list'] for devname in list: dev = list[devname] @@ -1064,7 +1109,7 @@ def mergeOverlapDevices(self, devlist): # merge any devices that overlap devlist for dev in devlist: devname = dev['name'] - for phase in self.phases: + for phase in self.sortedPhases(): list = self.dmesg[phase]['list'] if devname not in list: continue @@ -1079,7 +1124,7 @@ def mergeOverlapDevices(self, devlist): del list[devname] def usurpTouchingThread(self, name, dev): # the caller test has priority of this thread, give it to him - for phase in self.phases: + for phase in self.sortedPhases(): list = self.dmesg[phase]['list'] if name in list: tdev = list[name] @@ -1093,7 +1138,7 @@ def usurpTouchingThread(self, name, dev): break def stitchTouchingThreads(self, testlist): # merge any threads between tests that touch - for phase in self.phases: + for phase in self.sortedPhases(): list = self.dmesg[phase]['list'] for devname in list: dev = list[devname] @@ -1103,7 +1148,7 @@ def stitchTouchingThreads(self, testlist): data.usurpTouchingThread(devname, dev) def optimizeDevSrc(self): # merge any src call loops to reduce timeline size - for phase in self.phases: + for phase in self.sortedPhases(): list = self.dmesg[phase]['list'] for dev in list: if 'src' not in list[dev]: @@ -1141,7 +1186,7 @@ def trimTime(self, t0, dT, left): self.tKernSus = self.trimTimeVal(self.tKernSus, t0, dT, left) self.tKernRes = self.trimTimeVal(self.tKernRes, t0, dT, left) self.end = self.trimTimeVal(self.end, t0, dT, left) - for phase in self.phases: + for phase in self.sortedPhases(): p = self.dmesg[phase] p['start'] = self.trimTimeVal(p['start'], t0, dT, left) p['end'] = self.trimTimeVal(p['end'], t0, dT, left) @@ -1150,6 +1195,7 @@ def trimTime(self, t0, dT, left): d = list[name] d['start'] = self.trimTimeVal(d['start'], t0, dT, left) d['end'] = self.trimTimeVal(d['end'], t0, dT, left) + d['length'] = d['end'] - d['start'] if('ftrace' in d): cg = d['ftrace'] cg.start = self.trimTimeVal(cg.start, t0, dT, left) @@ -1166,30 +1212,51 @@ def trimTime(self, t0, dT, left): tm = self.trimTimeVal(tm, t0, dT, left) list.append((type, tm, idx1, idx2)) self.errorinfo[dir] = list - def normalizeTime(self, tZero): + def trimFreezeTime(self, tZero): # trim out any standby or freeze clock time - if(self.tSuspended != self.tResumed): - if(self.tResumed > tZero): - self.trimTime(self.tSuspended, \ - self.tResumed-self.tSuspended, True) - else: - self.trimTime(self.tSuspended, \ - self.tResumed-self.tSuspended, False) + lp = '' + for phase in self.sortedPhases(): + if 'resume_machine' in phase and 'suspend_machine' in lp: + tS, tR = self.dmesg[lp]['end'], self.dmesg[phase]['start'] + tL = tR - tS + if tL > 0: + left = True if tR > tZero else False + self.trimTime(tS, tL, left) + self.tLow.append('%.0f'%(tL*1000)) + lp = phase def getTimeValues(self): - sktime = (self.dmesg['suspend_machine']['end'] - \ - self.tKernSus) * 1000 - rktime = (self.dmesg['resume_complete']['end'] - \ - self.dmesg['resume_machine']['start']) * 1000 + sktime = (self.tSuspended - self.tKernSus) * 1000 + rktime = (self.tKernRes - self.tResumed) * 1000 return (sktime, rktime) - def setPhase(self, phase, ktime, isbegin): + def setPhase(self, phase, ktime, isbegin, order=-1): if(isbegin): + # phase start over current phase + if self.currphase: + if 'resume_machine' not in self.currphase: + sysvals.vprint('WARNING: phase %s failed to end' % self.currphase) + self.dmesg[self.currphase]['end'] = ktime + phases = self.dmesg.keys() + color = self.phasedef[phase]['color'] + count = len(phases) if order < 0 else order + # create unique name for every new phase + while phase in phases: + phase += '*' + self.dmesg[phase] = {'list': dict(), 'start': -1.0, 'end': -1.0, + 'row': 0, 'color': color, 'order': count} self.dmesg[phase]['start'] = ktime + self.currphase = phase else: + # phase end without a start + if phase not in self.currphase: + if self.currphase: + sysvals.vprint('WARNING: %s ended instead of %s, ftrace corruption?' % (phase, self.currphase)) + else: + sysvals.vprint('WARNING: %s ended without a start, ftrace corruption?' % phase) + return phase + phase = self.currphase self.dmesg[phase]['end'] = ktime - def dmesgSortVal(self, phase): - return self.dmesg[phase]['order'] - def sortedPhases(self): - return sorted(self.dmesg, key=self.dmesgSortVal) + self.currphase = '' + return phase def sortedDevices(self, phase): list = self.dmesg[phase]['list'] slist = [] @@ -1208,13 +1275,13 @@ def fixupInitcalls(self, phase): for devname in phaselist: dev = phaselist[devname] if(dev['end'] < 0): - for p in self.phases: + for p in self.sortedPhases(): if self.dmesg[p]['end'] > dev['start']: dev['end'] = self.dmesg[p]['end'] break sysvals.vprint('%s (%s): callback didnt return' % (devname, phase)) def deviceFilter(self, devicefilter): - for phase in self.phases: + for phase in self.sortedPhases(): list = self.dmesg[phase]['list'] rmlist = [] for name in list: @@ -1229,7 +1296,7 @@ def deviceFilter(self, devicefilter): del list[name] def fixupInitcallsThatDidntReturn(self): # if any calls never returned, clip them at system resume end - for phase in self.phases: + for phase in self.sortedPhases(): self.fixupInitcalls(phase) def phaseOverlap(self, phases): rmgroups = [] @@ -1248,17 +1315,18 @@ def phaseOverlap(self, phases): self.devicegroups.append(newgroup) def newActionGlobal(self, name, start, end, pid=-1, color=''): # which phase is this device callback or action in + phases = self.sortedPhases() targetphase = 'none' htmlclass = '' overlap = 0.0 - phases = [] - for phase in self.phases: + myphases = [] + for phase in phases: pstart = self.dmesg[phase]['start'] pend = self.dmesg[phase]['end'] # see if the action overlaps this phase o = max(0, min(end, pend) - max(start, pstart)) if o > 0: - phases.append(phase) + myphases.append(phase) # set the target phase to the one that overlaps most if o > overlap: if overlap > 0 and phase == 'post_resume': @@ -1267,19 +1335,19 @@ def newActionGlobal(self, name, start, end, pid=-1, color=''): overlap = o # if no target phase was found, pin it to the edge if targetphase == 'none': - p0start = self.dmesg[self.phases[0]]['start'] + p0start = self.dmesg[phases[0]]['start'] if start <= p0start: - targetphase = self.phases[0] + targetphase = phases[0] else: - targetphase = self.phases[-1] + targetphase = phases[-1] if pid == -2: htmlclass = ' bg' elif pid == -3: htmlclass = ' ps' - if len(phases) > 1: + if len(myphases) > 1: htmlclass = ' bg' - self.phaseOverlap(phases) - if targetphase in self.phases: + self.phaseOverlap(myphases) + if targetphase in phases: newname = self.newAction(targetphase, name, pid, '', start, end, '', htmlclass, color) return (targetphase, newname) return False @@ -1311,19 +1379,43 @@ def deviceChildren(self, devname, phase): if(list[child]['par'] == devname): devlist.append(child) return devlist + def maxDeviceNameSize(self, phase): + size = 0 + for name in self.dmesg[phase]['list']: + if len(name) > size: + size = len(name) + return size def printDetails(self): sysvals.vprint('Timeline Details:') sysvals.vprint(' test start: %f' % self.start) sysvals.vprint('kernel suspend start: %f' % self.tKernSus) - for phase in self.phases: - dc = len(self.dmesg[phase]['list']) - sysvals.vprint(' %16s: %f - %f (%d devices)' % (phase, \ - self.dmesg[phase]['start'], self.dmesg[phase]['end'], dc)) + tS = tR = False + for phase in self.sortedPhases(): + devlist = self.dmesg[phase]['list'] + dc, ps, pe = len(devlist), self.dmesg[phase]['start'], self.dmesg[phase]['end'] + if not tS and ps >= self.tSuspended: + sysvals.vprint(' machine suspended: %f' % self.tSuspended) + tS = True + if not tR and ps >= self.tResumed: + sysvals.vprint(' machine resumed: %f' % self.tResumed) + tR = True + sysvals.vprint('%20s: %f - %f (%d devices)' % (phase, ps, pe, dc)) + if sysvals.devdump: + sysvals.vprint(''.join('-' for i in range(80))) + maxname = '%d' % self.maxDeviceNameSize(phase) + fmt = '%3d) %'+maxname+'s - %f - %f' + c = 1 + for name in devlist: + s = devlist[name]['start'] + e = devlist[name]['end'] + sysvals.vprint(fmt % (c, name, s, e)) + c += 1 + sysvals.vprint(''.join('-' for i in range(80))) sysvals.vprint(' kernel resume end: %f' % self.tKernRes) sysvals.vprint(' test end: %f' % self.end) def deviceChildrenAllPhases(self, devname): devlist = [] - for phase in self.phases: + for phase in self.sortedPhases(): list = self.deviceChildren(devname, phase) for dev in list: if dev not in devlist: @@ -1344,7 +1436,7 @@ def printTopology(self, node): if node.name: info = '' drv = '' - for phase in self.phases: + for phase in self.sortedPhases(): list = self.dmesg[phase]['list'] if node.name in list: s = list[node.name]['start'] @@ -1478,8 +1570,29 @@ def createProcessUsageEvents(self): c = self.addProcessUsageEvent(ps, tres) if c > 0: sysvals.vprint('%25s (res): %d' % (ps, c)) + def handleEndMarker(self, time): + dm = self.dmesg + self.setEnd(time) + self.initDevicegroups() + # give suspend_prepare an end if needed + if 'suspend_prepare' in dm and dm['suspend_prepare']['end'] < 0: + dm['suspend_prepare']['end'] = time + # assume resume machine ends at next phase start + if 'resume_machine' in dm and dm['resume_machine']['end'] < 0: + np = self.nextPhase('resume_machine', 1) + if np: + dm['resume_machine']['end'] = dm[np]['start'] + # if kernel resume end not found, assume its the end marker + if self.tKernRes == 0.0: + self.tKernRes = time + # if kernel suspend start not found, assume its the end marker + if self.tKernSus == 0.0: + self.tKernSus = time + # set resume complete to end at end marker + if 'resume_complete' in dm: + dm['resume_complete']['end'] = time def debugPrint(self): - for p in self.phases: + for p in self.sortedPhases(): list = self.dmesg[p]['list'] for devname in list: dev = list[devname] @@ -1490,9 +1603,9 @@ def debugPrint(self): # Description: # A container for kprobe function data we want in the dev timeline class DevFunction: - row = 0 - count = 1 def __init__(self, name, args, caller, ret, start, end, u, proc, pid, color): + self.row = 0 + self.count = 1 self.name = name self.args = args self.caller = caller @@ -1546,16 +1659,15 @@ def repeat(self, tgt): # suspend_resume: phase or custom exec block data # device_pm_callback: device callback info class FTraceLine: - time = 0.0 - length = 0.0 - fcall = False - freturn = False - fevent = False - fkprobe = False - depth = 0 - name = '' - type = '' def __init__(self, t, m='', d=''): + self.length = 0.0 + self.fcall = False + self.freturn = False + self.fevent = False + self.fkprobe = False + self.depth = 0 + self.name = '' + self.type = '' self.time = float(t) if not m and not d: return @@ -1633,13 +1745,13 @@ def getDepth(self, str): return len(str)/2 def debugPrint(self, info=''): if self.isLeaf(): - print(' -- %12.6f (depth=%02d): %s(); (%.3f us) %s' % (self.time, \ + pprint(' -- %12.6f (depth=%02d): %s(); (%.3f us) %s' % (self.time, \ self.depth, self.name, self.length*1000000, info)) elif self.freturn: - print(' -- %12.6f (depth=%02d): %s} (%.3f us) %s' % (self.time, \ + pprint(' -- %12.6f (depth=%02d): %s} (%.3f us) %s' % (self.time, \ self.depth, self.name, self.length*1000000, info)) else: - print(' -- %12.6f (depth=%02d): %s() { (%.3f us) %s' % (self.time, \ + pprint(' -- %12.6f (depth=%02d): %s() { (%.3f us) %s' % (self.time, \ self.depth, self.name, self.length*1000000, info)) def startMarker(self): # Is this the starting line of a suspend? @@ -1675,19 +1787,13 @@ def endMarker(self): # Each instance is tied to a single device in a single phase, and is # comprised of an ordered list of FTraceLine objects class FTraceCallGraph: - id = '' - start = -1.0 - end = -1.0 - list = [] - invalid = False - depth = 0 - pid = 0 - name = '' - partial = False vfname = 'missing_function_name' - ignore = False - sv = 0 def __init__(self, pid, sv): + self.id = '' + self.invalid = False + self.name = '' + self.partial = False + self.ignore = False self.start = -1.0 self.end = -1.0 self.list = [] @@ -1786,7 +1892,7 @@ def addLine(self, line): if warning and ('[make leaf]', line) not in info: info.append(('', line)) if warning: - print 'WARNING: ftrace data missing, corrections made:' + pprint('WARNING: ftrace data missing, corrections made:') for i in info: t, obj = i if obj: @@ -1846,10 +1952,10 @@ def invalidate(self, line): id = 'task %s' % (self.pid) window = '(%f - %f)' % (self.start, line.time) if(self.depth < 0): - print('Data misalignment for '+id+\ + pprint('Data misalignment for '+id+\ ' (buffer overflow), ignoring this callback') else: - print('Too much data for '+id+\ + pprint('Too much data for '+id+\ ' '+window+', ignoring this callback') def slice(self, dev): minicg = FTraceCallGraph(dev['pid'], self.sv) @@ -1902,7 +2008,7 @@ def postProcess(self): elif l.isReturn(): if(l.depth not in stack): if self.sv.verbose: - print 'Post Process Error: Depth missing' + pprint('Post Process Error: Depth missing') l.debugPrint() return False # calculate call length from call/return lines @@ -1919,7 +2025,7 @@ def postProcess(self): return True elif(cnt < 0): if self.sv.verbose: - print 'Post Process Error: Depth is less than 0' + pprint('Post Process Error: Depth is less than 0') return False # trace ended before call tree finished return self.repair(cnt) @@ -1943,7 +2049,7 @@ def deviceMatch(self, pid, data): dev['ftrace'] = cg found = devname return found - for p in data.phases: + for p in data.sortedPhases(): if(data.dmesg[p]['start'] <= self.start and self.start <= data.dmesg[p]['end']): list = data.dmesg[p]['list'] @@ -1966,7 +2072,7 @@ def newActionFromFunction(self, data): if fs < data.start or fe > data.end: return phase = '' - for p in data.phases: + for p in data.sortedPhases(): if(data.dmesg[p]['start'] <= self.start and self.start < data.dmesg[p]['end']): phase = p @@ -1978,20 +2084,20 @@ def newActionFromFunction(self, data): phase, myname = out data.dmesg[phase]['list'][myname]['ftrace'] = self def debugPrint(self, info=''): - print('%s pid=%d [%f - %f] %.3f us') % \ + pprint('%s pid=%d [%f - %f] %.3f us' % \ (self.name, self.pid, self.start, self.end, - (self.end - self.start)*1000000) + (self.end - self.start)*1000000)) for l in self.list: if l.isLeaf(): - print('%f (%02d): %s(); (%.3f us)%s' % (l.time, \ + pprint('%f (%02d): %s(); (%.3f us)%s' % (l.time, \ l.depth, l.name, l.length*1000000, info)) elif l.freturn: - print('%f (%02d): %s} (%.3f us)%s' % (l.time, \ + pprint('%f (%02d): %s} (%.3f us)%s' % (l.time, \ l.depth, l.name, l.length*1000000, info)) else: - print('%f (%02d): %s() { (%.3f us)%s' % (l.time, \ + pprint('%f (%02d): %s() { (%.3f us)%s' % (l.time, \ l.depth, l.name, l.length*1000000, info)) - print(' ') + pprint(' ') class DevItem: def __init__(self, test, phase, dev): @@ -2008,23 +2114,20 @@ def isa(self, cls): # A container for a device timeline which calculates # all the html properties to display it correctly class Timeline: - html = '' - height = 0 # total timeline height - scaleH = 20 # timescale (top) row height - rowH = 30 # device row height - bodyH = 0 # body height - rows = 0 # total timeline rows - rowlines = dict() - rowheight = dict() html_tblock = '
\n' html_device = '
{6}
\n' html_phase = '
{5}
\n' html_phaselet = '
\n' html_legend = '
 {2}
\n' def __init__(self, rowheight, scaleheight): - self.rowH = rowheight - self.scaleH = scaleheight self.html = '' + self.height = 0 # total timeline height + self.scaleH = scaleheight # timescale (top) row height + self.rowH = rowheight # device row height + self.bodyH = 0 # body height + self.rows = 0 # total timeline rows + self.rowlines = dict() + self.rowheight = dict() def createHeader(self, sv, stamp): if(not stamp['time']): return @@ -2251,18 +2354,18 @@ def createTimeScale(self, m0, mMax, tTotal, mode): # Description: # A list of values describing the properties of these test runs class TestProps: - stamp = '' - sysinfo = '' - cmdline = '' - kparams = '' - S0i3 = False - fwdata = [] stampfmt = '# [a-z]*-(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})-'+\ '(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})'+\ ' (?P.*) (?P.*) (?P.*)$' + batteryfmt = '^# battery (?P\w*) (?P\d*) (?P\w*) (?P\d*)' + testerrfmt = '^# enter_sleep_error (?P.*)' sysinfofmt = '^# sysinfo .*' cmdlinefmt = '^# command \| (?P.*)' kparamsfmt = '^# kparams \| (?P.*)' + devpropfmt = '# Device Properties: .*' + tracertypefmt = '# tracer: (?P.*)' + firmwarefmt = '# fwsuspend (?P[0-9]*) fwresume (?P[0-9]*)$' + procexecfmt = 'ps - (?P.*)$' ftrace_line_fmt_fg = \ '^ *(?P