From 9be77e11dade414d2fa63750aa5c754fac49d619 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 19 Feb 2021 17:56:48 +0100 Subject: [PATCH 001/302] powerpc/mm: Move the linear_mapping_mutex to the ifdef where it is used The mutex linear_mapping_mutex is defined at the of the file while its only two user are within the CONFIG_MEMORY_HOTPLUG block. A compile without CONFIG_MEMORY_HOTPLUG set fails on PREEMPT_RT because its mutex implementation is smart enough to realize that it is unused. Move the definition of linear_mapping_mutex to ifdef block where it is used. Fixes: 1f73ad3e8d755 ("powerpc/mm: print warning in arch_remove_linear_mapping()") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210219165648.2505482-1-bigeasy@linutronix.de --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4e8ce6d8523237..7a59a5c9aa5dc9 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -54,7 +54,6 @@ #include -static DEFINE_MUTEX(linear_mapping_mutex); unsigned long long memory_limit; bool init_mem_is_free; @@ -72,6 +71,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, EXPORT_SYMBOL(phys_mem_access_prot); #ifdef CONFIG_MEMORY_HOTPLUG +static DEFINE_MUTEX(linear_mapping_mutex); #ifdef CONFIG_NUMA int memory_add_physaddr_to_nid(u64 start) From 9634afa67bfd933b231405d05dda37ffa169f32c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 23 Feb 2021 10:53:45 +0100 Subject: [PATCH 002/302] powerpc/chrp: Make hydra_init() static Commit 407d418f2fd4c20a ("powerpc/chrp: Move PHB discovery") moved the sole call to hydra_init() to the source file where it is defined, so it can be made static. Signed-off-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210223095345.2139416-1-geert@linux-m68k.org --- arch/powerpc/include/asm/hydra.h | 2 -- arch/powerpc/platforms/chrp/pci.c | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index ae02eb53d6ef82..d024447283a0cf 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -94,8 +94,6 @@ extern volatile struct Hydra __iomem *Hydra; #define HYDRA_INT_EXT7 18 /* Power Off Request */ #define HYDRA_INT_SPARE 19 -extern int hydra_init(void); - #endif /* __KERNEL__ */ #endif /* _ASMPPC_HYDRA_H */ diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index 8c421dc78b2833..76e6256cb0a788 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -131,8 +131,7 @@ static struct pci_ops rtas_pci_ops = volatile struct Hydra __iomem *Hydra = NULL; -int __init -hydra_init(void) +static int __init hydra_init(void) { struct device_node *np; struct resource r; From 4f46d57cab3b3410411b395a6fa12a07947cb14a Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 24 Feb 2021 15:29:21 +0800 Subject: [PATCH 003/302] powerpc: remove unneeded semicolon Fix the following coccicheck warnings: ./arch/powerpc/kernel/prom_init.c:2986:2-3: Unneeded semicolon. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1614151761-53721-1-git-send-email-jiapeng.chong@linux.alibaba.com --- arch/powerpc/kernel/prom_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index ccf77b985c8f6b..41ed7e33d8973c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2983,7 +2983,7 @@ static void __init fixup_device_tree_efika_add_phy(void) " 0x3 encode-int encode+" " s\" interrupts\" property" " finish-device"); - }; + } /* Check for a PHY device node - if missing then create one and * give it's phandle to the ethernet node */ From 5c4a4802b9ac8c1acdf2250fad3f8f2d6254f9c7 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Wed, 24 Feb 2021 13:25:47 +0530 Subject: [PATCH 004/302] powerpc: Fix spelling of "droping" to "dropping" in traps.c s/droping/dropping/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210224075547.763063-1-unixbhaskar@gmail.com --- arch/powerpc/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a44a30b0688ca5..a9b199b46beefc 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -405,7 +405,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs) * Now test if the interrupt has hit a range that may be using * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The * problem ranges all run un-relocated. Test real and virt modes - * at the same time by droping the high bit of the nip (virt mode + * at the same time by dropping the high bit of the nip (virt mode * entry points still have the +0x4000 offset). */ nip &= ~0xc000000000000000ULL; From 3a72c94ebfb1f171eba0715998010678a09ec796 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Tue, 23 Feb 2021 17:02:27 +1000 Subject: [PATCH 005/302] selftests/powerpc: Fix L1D flushing tests for Power10 The rfi_flush and entry_flush selftests work by using the PM_LD_MISS_L1 perf event to count L1D misses. The value of this event has changed over time: - Power7 uses 0x400f0 - Power8 and Power9 use both 0x400f0 and 0x3e054 - Power10 uses only 0x3e054 Rather than relying on raw values, configure perf to count L1D read misses in the most explicit way available. This fixes the selftests to work on systems without 0x400f0 as PM_LD_MISS_L1, and should change no behaviour for systems that the tests already worked on. The only potential downside is that referring to a specific perf event requires PMU support implemented in the kernel for that platform. Signed-off-by: Russell Currey Acked-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210223070227.2916871-1-ruscur@russell.cc --- tools/testing/selftests/powerpc/security/entry_flush.c | 2 +- tools/testing/selftests/powerpc/security/flush_utils.h | 4 ++++ tools/testing/selftests/powerpc/security/rfi_flush.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 78cf914fa32173..68ce377b205e91 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -53,7 +53,7 @@ int entry_flush_test(void) entry_flush = entry_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h index 07a5eb30146690..7a3d60292916ec 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.h +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -9,6 +9,10 @@ #define CACHELINE_SIZE 128 +#define PERF_L1D_READ_MISS_CONFIG ((PERF_COUNT_HW_CACHE_L1D) | \ + (PERF_COUNT_HW_CACHE_OP_READ << 8) | \ + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)) + void syscall_loop(char *p, unsigned long iterations, unsigned long zero_size); diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 7565fd786640fc..f73484a6470fae 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -54,7 +54,7 @@ int rfi_flush_test(void) rfi_flush = rfi_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); From fbced1546eaaab57a32e56c974ea8acf10c6abd8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 2 Mar 2021 12:50:14 -0700 Subject: [PATCH 006/302] powerpc/fadump: Mark fadump_calculate_reserve_size as __init If fadump_calculate_reserve_size() is not inlined, there is a modpost warning: WARNING: modpost: vmlinux.o(.text+0x5196c): Section mismatch in reference from the function fadump_calculate_reserve_size() to the function .init.text:parse_crashkernel() The function fadump_calculate_reserve_size() references the function __init parse_crashkernel(). This is often because fadump_calculate_reserve_size lacks a __init annotation or the annotation of parse_crashkernel is wrong. fadump_calculate_reserve_size() calls parse_crashkernel(), which is marked as __init and fadump_calculate_reserve_size() is called from within fadump_reserve_mem(), which is also marked as __init. Mark fadump_calculate_reserve_size() as __init to fix the section mismatch. Additionally, remove the inline keyword as it is not necessary to inline this function; the compiler is still free to do so if it feels it is worthwhile since commit 889b3c1245de ("compiler: remove CONFIG_OPTIMIZE_INLINING entirely"). Fixes: 11550dc0a00b ("powerpc/fadump: reuse crashkernel parameter for fadump memory reservation") Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/1300 Link: https://lore.kernel.org/r/20210302195013.2626335-1-nathan@kernel.org --- arch/powerpc/kernel/fadump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8482739d42f380..eddf362caedce8 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -292,7 +292,7 @@ static void fadump_show_config(void) * that is required for a kernel to boot successfully. * */ -static inline u64 fadump_calculate_reserve_size(void) +static __init u64 fadump_calculate_reserve_size(void) { u64 base, size, bootmem_min; int ret; From 1ef1dd9c7ed27b080445e1576e8a05957e0e4dfc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 2 Mar 2021 13:08:29 -0700 Subject: [PATCH 007/302] powerpc/prom: Mark identical_pvr_fixup as __init If identical_pvr_fixup() is not inlined, there are two modpost warnings: WARNING: modpost: vmlinux.o(.text+0x54e8): Section mismatch in reference from the function identical_pvr_fixup() to the function .init.text:of_get_flat_dt_prop() The function identical_pvr_fixup() references the function __init of_get_flat_dt_prop(). This is often because identical_pvr_fixup lacks a __init annotation or the annotation of of_get_flat_dt_prop is wrong. WARNING: modpost: vmlinux.o(.text+0x551c): Section mismatch in reference from the function identical_pvr_fixup() to the function .init.text:identify_cpu() The function identical_pvr_fixup() references the function __init identify_cpu(). This is often because identical_pvr_fixup lacks a __init annotation or the annotation of identify_cpu is wrong. identical_pvr_fixup() calls two functions marked as __init and is only called by a function marked as __init so it should be marked as __init as well. At the same time, remove the inline keywork as it is not necessary to inline this function. The compiler is still free to do so if it feels it is worthwhile since commit 889b3c1245de ("compiler: remove CONFIG_OPTIMIZE_INLINING entirely"). Fixes: 14b3d926a22b ("[POWERPC] 4xx: update 440EP(x)/440GR(x) identical PVR issue workaround") Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/1316 Link: https://lore.kernel.org/r/20210302200829.2680663-1-nathan@kernel.org --- arch/powerpc/kernel/prom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9a4797d1d40d54..a8b2d6bfc1ca7e 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -267,7 +267,7 @@ static struct feature_property { }; #if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU) -static inline void identical_pvr_fixup(unsigned long node) +static __init void identical_pvr_fixup(unsigned long node) { unsigned int pvr; const char *model = of_get_flat_dt_prop(node, "model", NULL); From 1a0e4550fb12d51cd76e2b9439f45977473b022f Mon Sep 17 00:00:00 2001 From: Zhang Yunkai Date: Wed, 3 Mar 2021 20:49:43 -0800 Subject: [PATCH 008/302] powerpc: Remove duplicate includes asm/tm.h included in traps.c is duplicated. It is also included on the 62nd line. asm/udbg.h included in setup-common.c is duplicated. It is also included on the 61st line. asm/bug.h included in arch/powerpc/include/asm/book3s/64/mmu-hash.h is duplicated. It is also included on the 12th line. asm/tlbflush.h included in arch/powerpc/include/asm/pgtable.h is duplicated. It is also included on the 11th line. asm/page.h included in arch/powerpc/include/asm/thread_info.h is duplicated. It is also included on the 13th line. Signed-off-by: Zhang Yunkai [mpe: Squash together from multiple commits] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 - arch/powerpc/include/asm/pgtable.h | 2 -- arch/powerpc/include/asm/thread_info.h | 1 - arch/powerpc/kernel/setup-common.c | 1 - arch/powerpc/kernel/traps.c | 1 - 5 files changed, 6 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index f911bdb68d8bb5..3004f3323144d1 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -18,7 +18,6 @@ * complete pgtable.h but only a portion of it. */ #include -#include #include #include diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 4eed82172e3348..c6a676714f0409 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -41,8 +41,6 @@ struct mm_struct; #ifndef __ASSEMBLY__ -#include - /* Keep these as a macros to avoid include dependency mess */ #define pte_page(x) pfn_to_page(pte_pfn(x)) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 386d576673a1df..9d6402402b9bad 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -38,7 +38,6 @@ #ifndef __ASSEMBLY__ #include #include -#include #include #define SLB_PRELOAD_NR 16U diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index bee984b1887b6c..7221f11acf0460 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -69,7 +69,6 @@ #include "setup.h" #ifdef DEBUG -#include #define DBG(fmt...) udbg_printf(fmt) #else #define DBG(fmt...) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a9b199b46beefc..97914ee2fdc950 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -53,7 +53,6 @@ #ifdef CONFIG_PPC64 #include #include -#include #endif #include #include From 1a029e0edbc5890f76b642222d9899c093212fe6 Mon Sep 17 00:00:00 2001 From: Zhang Yunkai Date: Wed, 3 Mar 2021 19:13:18 -0800 Subject: [PATCH 009/302] powerpc: Fix misspellings in tlbflush.h The comment marking the end of the include guard is wrong, fix it up. Signed-off-by: Zhang Yunkai [mpe: Rewrite commit message] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210304031318.188447-1-zhang.yunkai@zte.com.cn --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index d941c06d4f2eaa..ba1743c52b56d4 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -79,4 +79,4 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) flush_tlb_mm(mm); } -#endif /* _ASM_POWERPC_TLBFLUSH_H */ +#endif /* _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H */ From 13b8219bd00d953cad60431cb47db96eb835c71d Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 3 Mar 2021 12:46:03 +0000 Subject: [PATCH 010/302] powerpc/pseries: Move hvc_vio_init_early() prototype to shared location MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/tty/hvc/hvc_vio.c:385:13: warning: no previous prototype for ‘hvc_vio_init_early’ 385 | void __init hvc_vio_init_early(void) | ^~~~~~~~~~~~~~~~~~ Signed-off-by: Lee Jones Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210303124603.3150175-1-lee.jones@linaro.org --- arch/powerpc/include/asm/hvconsole.h | 3 +++ arch/powerpc/platforms/pseries/pseries.h | 3 --- arch/powerpc/platforms/pseries/setup.c | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h index 999ed5ac905316..ccb2034506f0f1 100644 --- a/arch/powerpc/include/asm/hvconsole.h +++ b/arch/powerpc/include/asm/hvconsole.h @@ -24,5 +24,8 @@ extern int hvc_get_chars(uint32_t vtermno, char *buf, int count); extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count); +/* Provided by HVC VIO */ +void hvc_vio_init_early(void); + #endif /* __KERNEL__ */ #endif /* _PPC64_HVCONSOLE_H */ diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 4fe48c04c6c20a..a13438fca10a89 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -43,9 +43,6 @@ extern void pSeries_final_fixup(void); /* Poweron flag used for enabling auto ups restart */ extern unsigned long rtas_poweron_auto; -/* Provided by HVC VIO */ -extern void hvc_vio_init_early(void); - /* Dynamic logical Partitioning/Mobility */ extern void dlpar_free_cc_nodes(struct device_node *); extern void dlpar_free_cc_property(struct property *); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 46e1540abc2297..145e3f4c999afe 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -71,6 +71,7 @@ #include #include #include +#include #include "pseries.h" #include "../../../../drivers/pci/pci.h" From 0b71b37241784c309bea6bd6a9d2027943c4ab94 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 5 Mar 2021 14:28:07 +0300 Subject: [PATCH 011/302] powerpc/ptrace: Remove duplicate check from pt_regs_check() "offsetof(struct pt_regs, msr) == offsetof(struct user_pt_regs, msr)" checked in pt_regs_check() twice in a row. Remove the second check. Signed-off-by: Denis Efremov Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210305112807.26299-1-efremov@linux.com --- arch/powerpc/kernel/ptrace/ptrace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 4f3d4ff3728cb0..51801777906c29 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -354,8 +354,6 @@ void __init pt_regs_check(void) offsetof(struct user_pt_regs, nip)); BUILD_BUG_ON(offsetof(struct pt_regs, msr) != offsetof(struct user_pt_regs, msr)); - BUILD_BUG_ON(offsetof(struct pt_regs, msr) != - offsetof(struct user_pt_regs, msr)); BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != offsetof(struct user_pt_regs, orig_gpr3)); BUILD_BUG_ON(offsetof(struct pt_regs, ctr) != From 90cbac0e995dd92f7bcf82f74aa50250bf194a4a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 4 Mar 2021 14:35:09 +0000 Subject: [PATCH 012/302] powerpc: Enable KFENCE for PPC32 Add architecture specific implementation details for KFENCE and enable KFENCE for the ppc32 architecture. In particular, this implements the required interface in . KFENCE requires that attributes for pages from its memory pool can individually be set. Therefore, force the Read/Write linear map to be mapped at page granularity. Signed-off-by: Christophe Leroy Acked-by: Marco Elver Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8dfe1bd2abde26337c1d8c1ad0acfcc82185e0d5.1614868445.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 13 ++++++------ arch/powerpc/include/asm/kfence.h | 33 +++++++++++++++++++++++++++++++ arch/powerpc/mm/book3s32/mmu.c | 2 +- arch/powerpc/mm/fault.c | 7 ++++++- arch/powerpc/mm/init_32.c | 3 +++ arch/powerpc/mm/mmu_decl.h | 5 +++++ arch/powerpc/mm/nohash/8xx.c | 4 ++-- 7 files changed, 57 insertions(+), 10 deletions(-) create mode 100644 arch/powerpc/include/asm/kfence.h diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 386ae12d8523b4..d46db0bfb99878 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -185,6 +185,7 @@ config PPC select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KGDB + select HAVE_ARCH_KFENCE if PPC32 select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_NVRAM_OPS @@ -786,7 +787,7 @@ config THREAD_SHIFT config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS - depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC + depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) help This option allows you to set the kernel data alignment. When @@ -798,13 +799,13 @@ config DATA_SHIFT_BOOL config DATA_SHIFT int "Data shift" if DATA_SHIFT_BOOL default 24 if STRICT_KERNEL_RWX && PPC64 - range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32 - range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx + range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 + range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - default 18 if DEBUG_PAGEALLOC && PPC_BOOK3S_32 + default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 default 23 if STRICT_KERNEL_RWX && PPC_8xx - default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA - default 19 if DEBUG_PAGEALLOC && PPC_8xx + default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA + default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx default PPC_PAGE_SHIFT help On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h new file mode 100644 index 00000000000000..a9846b68c6b9ea --- /dev/null +++ b/arch/powerpc/include/asm/kfence.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * powerpc KFENCE support. + * + * Copyright (C) 2020 CS GROUP France + */ + +#ifndef __ASM_POWERPC_KFENCE_H +#define __ASM_POWERPC_KFENCE_H + +#include +#include + +static inline bool arch_kfence_init_pool(void) +{ + return true; +} + +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + pte_t *kpte = virt_to_kpte(addr); + + if (protect) { + pte_update(&init_mm, addr, kpte, _PAGE_PRESENT, 0, 0); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + } else { + pte_update(&init_mm, addr, kpte, 0, _PAGE_PRESENT, 0); + } + + return true; +} + +#endif /* __ASM_POWERPC_KFENCE_H */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index d7eb266a3f7ad8..a0db398b5c2658 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -162,7 +162,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; - if (debug_pagealloc_enabled() || __map_without_bats) { + if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) { pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index bb368257b55cb4..bea13682c9092a 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -418,8 +419,12 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, * take a page fault to a kernel address or a page fault to a user * address outside of dedicated places */ - if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) + if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) { + if (kfence_handle_page_fault(address, is_write, regs)) + return 0; + return SIGSEGV; + } /* * If we're in an interrupt, have no user context or are running diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 02c7db4087cbc7..3d690be48e845e 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -97,6 +97,9 @@ static void __init MMU_setup(void) if (IS_ENABLED(CONFIG_PPC_8xx)) return; + if (IS_ENABLED(CONFIG_KFENCE)) + __map_without_ltlbs = 1; + if (debug_pagealloc_enabled()) __map_without_ltlbs = 1; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 998810e685620d..7dac910c0b217f 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -185,3 +185,8 @@ void ptdump_check_wx(void); #else static inline void ptdump_check_wx(void) { } #endif + +static inline bool debug_pagealloc_enabled_or_kfence(void) +{ + return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled(); +} diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 19a3eec1d8c52b..71bfdbedacee81 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -149,7 +149,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); unsigned long sinittext = __pa(_sinittext); - bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled(); + bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled_or_kfence(); unsigned long boundary = strict_boundary ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); @@ -161,7 +161,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return 0; mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); - if (debug_pagealloc_enabled()) { + if (debug_pagealloc_enabled_or_kfence()) { top = boundary; } else { mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); From 6ce56e1ac380eaa088d3f4c01446e15e195bd541 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 5 Mar 2021 13:55:54 +0100 Subject: [PATCH 013/302] powerpc/pseries: export LPAR security flavor in lparcfg This is helpful to read the security flavor from inside the LPAR. In /sys/kernel/debug/powerpc/security_features it can be seen if mitigations are on or off but not the level set through the ASMI menu. Furthermore, reporting it through /proc/powerpc/lparcfg allows an easy processing by the lparstat command [1]. Export it like this in /proc/powerpc/lparcfg: $ grep security_flavor /proc/powerpc/lparcfg security_flavor=1 Value follows what is documented on the IBM support page [2]: 0 Speculative execution fully enabled 1 Speculative execution controls to mitigate user-to-kernel attacks 2 Speculative execution controls to mitigate user-to-kernel and user-to-user side-channel attacks [1] https://groups.google.com/g/powerpc-utils-devel/c/NaKXvdyl_UI/m/wa2stpIDAQAJ [2] https://www.ibm.com/support/pages/node/715841 Signed-off-by: Laurent Dufour Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210305125554.5165-1-ldufour@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 1 + arch/powerpc/platforms/pseries/lparcfg.c | 2 ++ arch/powerpc/platforms/pseries/pseries.h | 1 + arch/powerpc/platforms/pseries/setup.c | 7 +++++++ 4 files changed, 11 insertions(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index ed6086d57b22e1..455e188da26dbb 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -389,6 +389,7 @@ #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 +#define H_CPU_BEHAV_FAVOUR_SECURITY_H (1ull << 60) // IBM bit 3 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5 #define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6 diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index e278390ab28d14..f71eac74ea92ab 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -537,6 +537,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) parse_em_data(m); maxmem_data(m); + seq_printf(m, "security_flavor=%u\n", pseries_security_flavor); + return 0; } diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index a13438fca10a89..8925a0fac15f70 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -108,6 +108,7 @@ static inline unsigned long cmo_get_page_size(void) int dlpar_workqueue_init(void); +extern u32 pseries_security_flavor; void pseries_setup_security_mitigations(void); void pseries_lpar_read_hblkrm_characteristics(void); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 145e3f4c999afe..754e493b7c05bb 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -86,6 +86,7 @@ EXPORT_SYMBOL(CMO_PageSize); int fwnmi_active; /* TRUE if an FWNMI handler is present */ int ibm_nmi_interlock_token; +u32 pseries_security_flavor; static void pSeries_show_cpuinfo(struct seq_file *m) { @@ -535,9 +536,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) /* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. + * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if + * H_CPU_BEHAV_FAVOUR_SECURITY is. */ if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) + pseries_security_flavor = 1; + else + pseries_security_flavor = 2; if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); From c6adc835c68b713360f918d21372c2f34fc228e2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:57:00 +0000 Subject: [PATCH 014/302] powerpc/uaccess: Also perform 64 bits copies in unsafe_copy_to_user() on ppc32 ppc32 has an efficiant 64 bits __put_user(), so also use it in order to unroll loops more. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ccc08a16eea682d6fa4acc957ffe34003a8f0844.1615398498.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 78e2a3990eab30..2c09cff205efbf 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -494,9 +494,9 @@ do { \ size_t _len = (l); \ int _i; \ \ - for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long)) \ - unsafe_put_user(*(long*)(_src + _i), (long __user *)(_dst + _i), e); \ - if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) { \ + for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64)) \ + unsafe_put_user(*(u64 *)(_src + _i), (u64 __user *)(_dst + _i), e); \ + if (_len & 4) { \ unsafe_put_user(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \ _i += 4; \ } \ From 7472199a6eda6a79f9e3b126f52f67f9ce3e1f77 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:57:01 +0000 Subject: [PATCH 015/302] powerpc/uaccess: Swap clear_user() and __clear_user() It is clear_user() which is expected to call __clear_user(), not the reverse. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d8ec01fb22f33d87321451d5e5f01cb56dacaa39.1615398498.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 2c09cff205efbf..1c1d404514b108 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -414,21 +414,20 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) unsigned long __arch_clear_user(void __user *addr, unsigned long size); -static inline unsigned long clear_user(void __user *addr, unsigned long size) +static inline unsigned long __clear_user(void __user *addr, unsigned long size) { - unsigned long ret = size; + unsigned long ret; + might_fault(); - if (likely(access_ok(addr, size))) { - allow_write_to_user(addr, size); - ret = __arch_clear_user(addr, size); - prevent_write_to_user(addr, size); - } + allow_write_to_user(addr, size); + ret = __arch_clear_user(addr, size); + prevent_write_to_user(addr, size); return ret; } -static inline unsigned long __clear_user(void __user *addr, unsigned long size) +static inline unsigned long clear_user(void __user *addr, unsigned long size) { - return clear_user(addr, size); + return likely(access_ok(addr, size)) ? __clear_user(addr, size) : size; } extern long strncpy_from_user(char *dst, const char __user *src, long count); From 4b8cda58812c1e1bf79d37f2ddff3cf03b7025da Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:57:02 +0000 Subject: [PATCH 016/302] powerpc/uaccess: Move copy_mc_xxx() functions down copy_mc_xxx() functions are in the middle of raw_copy functions. For clarity, move them out of the raw_copy functions block. They are using access_ok, so they need to be after the general functions in order to eventually allow the inclusion of asm-generic/uaccess.h in some future. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2cdecb6e5a2fcee6c158d18dd254b71ec0e0da4d.1615398498.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 52 +++++++++++++++--------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 1c1d404514b108..479cb30eabd715 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -351,32 +351,6 @@ do { \ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); -#ifdef CONFIG_ARCH_HAS_COPY_MC -unsigned long __must_check -copy_mc_generic(void *to, const void *from, unsigned long size); - -static inline unsigned long __must_check -copy_mc_to_kernel(void *to, const void *from, unsigned long size) -{ - return copy_mc_generic(to, from, size); -} -#define copy_mc_to_kernel copy_mc_to_kernel - -static inline unsigned long __must_check -copy_mc_to_user(void __user *to, const void *from, unsigned long n) -{ - if (likely(check_copy_size(from, n, true))) { - if (access_ok(to, n)) { - allow_write_to_user(to, n); - n = copy_mc_generic((void *)to, from, n); - prevent_write_to_user(to, n); - } - } - - return n; -} -#endif - #ifdef __powerpc64__ static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) @@ -433,6 +407,32 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) extern long strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); +#ifdef CONFIG_ARCH_HAS_COPY_MC +unsigned long __must_check +copy_mc_generic(void *to, const void *from, unsigned long size); + +static inline unsigned long __must_check +copy_mc_to_kernel(void *to, const void *from, unsigned long size) +{ + return copy_mc_generic(to, from, size); +} +#define copy_mc_to_kernel copy_mc_to_kernel + +static inline unsigned long __must_check +copy_mc_to_user(void __user *to, const void *from, unsigned long n) +{ + if (likely(check_copy_size(from, n, true))) { + if (access_ok(to, n)) { + allow_write_to_user(to, n); + n = copy_mc_generic((void *)to, from, n); + prevent_write_to_user(to, n); + } + } + + return n; +} +#endif + extern long __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size); extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, From fd69d544b0e785b11699675154bdfe01a04538cd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:57:03 +0000 Subject: [PATCH 017/302] powerpc/syscalls: Use sys_old_select() in ppc_select() Instead of opencodying the copy of parameters, use the generic sys_old_select(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4de983ad254739da1fe6e9f273baf387b7043ae0.1615398498.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/unistd.h | 1 + arch/powerpc/kernel/syscalls.c | 12 ++---------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 700fcdac2e3cc7..b541c690a31c2c 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -40,6 +40,7 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #ifdef CONFIG_PPC32 #define __ARCH_WANT_OLD_STAT +#define __ARCH_WANT_SYS_OLD_SELECT #endif #ifdef CONFIG_PPC64 #define __ARCH_WANT_SYS_TIME diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 078608ec2e9218..a552c9e68d7e69 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -82,16 +82,8 @@ int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp) { if ( (unsigned long)n >= 4096 ) - { - unsigned long __user *buffer = (unsigned long __user *)n; - if (!access_ok(buffer, 5*sizeof(unsigned long)) - || __get_user(n, buffer) - || __get_user(inp, ((fd_set __user * __user *)(buffer+1))) - || __get_user(outp, ((fd_set __user * __user *)(buffer+2))) - || __get_user(exp, ((fd_set __user * __user *)(buffer+3))) - || __get_user(tvp, ((struct __kernel_old_timeval __user * __user *)(buffer+4)))) - return -EFAULT; - } + return sys_old_select((void __user *)n); + return sys_select(n, inp, outp, exp, tvp); } #endif From e63ceebdad82f85e48b018abfc6af4ed6958179e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:57:04 +0000 Subject: [PATCH 018/302] powerpc/lib: Don't use __put_user_asm_goto() outside of uaccess.h __put_user_asm_goto() is internal to uaccess.h Use __put_kernel_nofault() instead. The generated code is identical. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3e32c4f0361933909368b68f5ee569e5de661c1b.1615398498.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/code-patching.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 2333625b5e3150..65aec4d6d9ba9c 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -21,10 +21,15 @@ static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr, struct ppc_inst *patch_addr) { - if (!ppc_inst_prefixed(instr)) - __put_user_asm_goto(ppc_inst_val(instr), patch_addr, failed, "stw"); - else - __put_user_asm_goto(ppc_inst_as_u64(instr), patch_addr, failed, "std"); + if (!ppc_inst_prefixed(instr)) { + u32 val = ppc_inst_val(instr); + + __put_kernel_nofault(patch_addr, &val, u32, failed); + } else { + u64 val = ppc_inst_as_u64(instr); + + __put_kernel_nofault(patch_addr, &val, u64, failed); + } asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), "r" (exec_addr)); From 164dc6ce368fa23b0aae0e5d12883fff9bf80458 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:57:05 +0000 Subject: [PATCH 019/302] powerpc/net: Switch csum_and_copy_{to/from}_user to user_access block Use user_access_begin() instead of the might_sleep/access_ok/allow_access sequence. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2dee286d2d6dc9a27d99e31ac564bad4fae2cb49.1615398498.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/checksum_wrappers.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c index b895166afc8283..f3999cbb2fcc4c 100644 --- a/arch/powerpc/lib/checksum_wrappers.c +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -16,16 +16,12 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst, { __wsum csum; - might_sleep(); - - if (unlikely(!access_ok(src, len))) + if (unlikely(!user_read_access_begin(src, len))) return 0; - allow_read_from_user(src, len); - csum = csum_partial_copy_generic((void __force *)src, dst, len); - prevent_read_from_user(src, len); + user_read_access_end(); return csum; } EXPORT_SYMBOL(csum_and_copy_from_user); @@ -34,15 +30,12 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len) { __wsum csum; - might_sleep(); - if (unlikely(!access_ok(dst, len))) + if (unlikely(!user_write_access_begin(dst, len))) return 0; - allow_write_to_user(dst, len); - csum = csum_partial_copy_generic(src, (void __force *)dst, len); - prevent_write_to_user(dst, len); + user_write_access_end(); return csum; } EXPORT_SYMBOL(csum_and_copy_to_user); From 870779f40e99c795ddfafa0dfc43318e51f15127 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:57:06 +0000 Subject: [PATCH 020/302] powerpc/futex: Switch to user_access block Use user_access_begin() instead of the access_ok/allow_access sequence. This brings the missing might_fault() check. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6cd202cdc4f939d47822e4ddd3c0856210431a58.1615398498.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/futex.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index e93ee3202e4c17..b3001f8b2c1e20 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -33,9 +33,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; - if (!access_ok(uaddr, sizeof(u32))) + if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; - allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); switch (op) { case FUTEX_OP_SET: @@ -56,10 +55,10 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, default: ret = -ENOSYS; } + user_access_end(); *oval = oldval; - prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); return ret; } @@ -70,11 +69,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, int ret = 0; u32 prev; - if (!access_ok(uaddr, sizeof(u32))) + if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; - allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); - __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -93,8 +90,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) : "cc", "memory"); + user_access_end(); + *uval = prev; - prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); return ret; } From 93c043e393af7fa218c928d8c62396ba28f1bb84 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:57:07 +0000 Subject: [PATCH 021/302] powerpc/ptrace: Convert gpr32_set_common() to user access block Use user access block in gpr32_set_common() instead of repetitive __get_user() which imply repetitive KUAP open/close. To get it clean, force inlining of the small set of tiny functions called inside the block. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bdcb8652c3bb4ab5b8b3bfd08147434be8fc04c9.1615398498.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ptrace.h | 2 +- arch/powerpc/kernel/ptrace/ptrace-view.c | 30 ++++++++++++++---------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 1499e928ea6a62..bedbca062f0353 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -222,7 +222,7 @@ do { \ } while (0) #endif /* __powerpc64__ */ -static inline void set_trap(struct pt_regs *regs, unsigned long val) +static __always_inline void set_trap(struct pt_regs *regs, unsigned long val) { regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK); } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 2bad8068f598c6..0923c94f684e96 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -111,7 +111,7 @@ static unsigned long get_user_msr(struct task_struct *task) return task->thread.regs->msr | task->thread.fpexc_mode; } -static int set_user_msr(struct task_struct *task, unsigned long msr) +static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr) { task->thread.regs->msr &= ~MSR_DEBUGCHANGE; task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; @@ -147,7 +147,7 @@ static int set_user_dscr(struct task_struct *task, unsigned long dscr) * We prevent mucking around with the reserved area of trap * which are used internally by the kernel. */ -static int set_user_trap(struct task_struct *task, unsigned long trap) +static __always_inline int set_user_trap(struct task_struct *task, unsigned long trap) { set_trap(task->thread.regs, trap); return 0; @@ -661,6 +661,9 @@ int gpr32_set_common(struct task_struct *target, const compat_ulong_t __user *u = ubuf; compat_ulong_t reg; + if (!kbuf && !user_read_access_begin(u, count)) + return -EFAULT; + pos /= sizeof(reg); count /= sizeof(reg); @@ -669,8 +672,7 @@ int gpr32_set_common(struct task_struct *target, regs[pos++] = *k++; else for (; count > 0 && pos < PT_MSR; --count) { - if (__get_user(reg, u++)) - return -EFAULT; + unsafe_get_user(reg, u++, Efault); regs[pos++] = reg; } @@ -678,8 +680,8 @@ int gpr32_set_common(struct task_struct *target, if (count > 0 && pos == PT_MSR) { if (kbuf) reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; + else + unsafe_get_user(reg, u++, Efault); set_user_msr(target, reg); ++pos; --count; @@ -692,24 +694,24 @@ int gpr32_set_common(struct task_struct *target, ++k; } else { for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { - if (__get_user(reg, u++)) - return -EFAULT; + unsafe_get_user(reg, u++, Efault); regs[pos++] = reg; } for (; count > 0 && pos < PT_TRAP; --count, ++pos) - if (__get_user(reg, u++)) - return -EFAULT; + unsafe_get_user(reg, u++, Efault); } if (count > 0 && pos == PT_TRAP) { if (kbuf) reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; + else + unsafe_get_user(reg, u++, Efault); set_user_trap(target, reg); ++pos; --count; } + if (!kbuf) + user_read_access_end(); kbuf = k; ubuf = u; @@ -717,6 +719,10 @@ int gpr32_set_common(struct task_struct *target, count *= sizeof(reg); return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, (PT_TRAP + 1) * sizeof(reg), -1); + +Efault: + user_read_access_end(); + return -EFAULT; } static int gpr32_get(struct task_struct *target, From 2bf3604c415c9d75311141b8eb6ac8780ef74420 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Mar 2021 17:59:48 -0800 Subject: [PATCH 022/302] powerpc/spinlock: Define smp_mb__after_spinlock only once Instead of both queued and simple spinlocks doing it. Move it into the arch's spinlock.h. Signed-off-by: Davidlohr Bueso Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210309015950.27688-2-dave@stgolabs.net --- arch/powerpc/include/asm/qspinlock.h | 2 -- arch/powerpc/include/asm/simple_spinlock.h | 3 --- arch/powerpc/include/asm/spinlock.h | 3 +++ 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index b752d34517b39d..3ce1a0bee4fe9f 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -44,8 +44,6 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) } #define queued_spin_lock queued_spin_lock -#define smp_mb__after_spinlock() smp_mb() - static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { /* diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h index 5b862de29dff6c..da5d40cb8de0d2 100644 --- a/arch/powerpc/include/asm/simple_spinlock.h +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -282,7 +282,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #define arch_read_relax(lock) rw_yield(lock) #define arch_write_relax(lock) rw_yield(lock) -/* See include/linux/spinlock.h */ -#define smp_mb__after_spinlock() smp_mb() - #endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 6ec72282888dc2..bd75872a6334a4 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -10,6 +10,9 @@ #include #endif +/* See include/linux/spinlock.h */ +#define smp_mb__after_spinlock() smp_mb() + #ifndef CONFIG_PARAVIRT_SPINLOCKS static inline void pv_spinlocks_init(void) { } #endif From 66f60522138c2e0d8a3518edd4979df11a2d7525 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Mar 2021 17:59:49 -0800 Subject: [PATCH 023/302] powerpc/spinlock: Unserialize spin_is_locked c6f5d02b6a0f (locking/spinlocks/arm64: Remove smp_mb() from arch_spin_is_locked()) made it pretty official that the call semantics do not imply any sort of barriers, and any user that gets creative must explicitly do any serialization. This creativity, however, is nowadays pretty limited: 1. spin_unlock_wait() has been removed from the kernel in favor of a lock/unlock combo. Furthermore, queued spinlocks have now for a number of years no longer relied on _Q_LOCKED_VAL for the call, but any non-zero value to indicate a locked state. There were cases where the delayed locked store could lead to breaking mutual exclusion with crossed locking; such as with sysv ipc and netfilter being the most extreme. 2. The auditing Andrea did in verified that remaining spin_is_locked() no longer rely on such semantics. Most callers just use it to assert a lock is taken, in a debug nature. The only user that gets cute is NOLOCK qdisc, as of: 96009c7d500e (sched: replace __QDISC_STATE_RUNNING bit with a spin lock) ... which ironically went in the next day after c6f5d02b6a0f. This change replaces test_bit() with spin_is_locked() to know whether to take the busylock heuristic to reduce contention on the main qdisc lock. So any races against spin_is_locked() for archs that use LL/SC for spin_lock() will be benign and not break any mutual exclusion; furthermore, both the seqlock and busylock have the same scope. Signed-off-by: Davidlohr Bueso Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210309015950.27688-3-dave@stgolabs.net --- arch/powerpc/include/asm/qspinlock.h | 12 ------------ arch/powerpc/include/asm/simple_spinlock.h | 3 +-- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 3ce1a0bee4fe9f..b052b062481628 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -44,18 +44,6 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) } #define queued_spin_lock queued_spin_lock -static __always_inline int queued_spin_is_locked(struct qspinlock *lock) -{ - /* - * This barrier was added to simple spinlocks by commit 51d7d5205d338, - * but it should now be possible to remove it, asm arm64 has done with - * commit c6f5d02b6a0f. - */ - smp_mb(); - return atomic_read(&lock->val); -} -#define queued_spin_is_locked queued_spin_is_locked - #ifdef CONFIG_PARAVIRT_SPINLOCKS #define SPIN_THRESHOLD (1<<15) /* not tuned */ diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h index da5d40cb8de0d2..552f325412cc7e 100644 --- a/arch/powerpc/include/asm/simple_spinlock.h +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -38,8 +38,7 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - smp_mb(); - return !arch_spin_value_unlocked(*lock); + return !arch_spin_value_unlocked(READ_ONCE(*lock)); } /* From deb9b13eb2571fbde164ae012c77985fd14f2f02 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Mar 2021 17:59:50 -0800 Subject: [PATCH 024/302] powerpc/qspinlock: Use generic smp_cond_load_relaxed 49a7d46a06c3 (powerpc: Implement smp_cond_load_relaxed()) added busy-waiting pausing with a preferred SMT priority pattern, lowering the priority (reducing decode cycles) during the whole loop slowpath. However, data shows that while this pattern works well with simple spinlocks, queued spinlocks benefit more being kept in medium priority, with a cpu_relax() instead, being a low+medium combo on powerpc. Data is from three benchmarks on a Power9: 9008-22L 64 CPUs with 2 sockets and 8 threads per core. 1. locktorture. This is data for the lowest and most artificial/pathological level, with increasing thread counts pounding on the lock. Metrics are total ops/minute. Despite some small hits in the 4-8 range, scenarios are either neutral or favorable to this patch. +=========+==========+==========+=======+ | # tasks | vanilla | dirty | %diff | +=========+==========+==========+=======+ | 2 | 46718565 | 48751350 | 4.35 | +---------+----------+----------+-------+ | 4 | 51740198 | 50369082 | -2.65 | +---------+----------+----------+-------+ | 8 | 63756510 | 62568821 | -1.86 | +---------+----------+----------+-------+ | 16 | 67824531 | 70966546 | 4.63 | +---------+----------+----------+-------+ | 32 | 53843519 | 61155508 | 13.58 | +---------+----------+----------+-------+ | 64 | 53005778 | 53104412 | 0.18 | +---------+----------+----------+-------+ | 128 | 53331980 | 54606910 | 2.39 | +=========+==========+==========+=======+ 2. sockperf (tcp throughput) Here a client will do one-way throughput tests to a localhost server, with increasing message sizes, dealing with the sk_lock. This patch shows to put the performance of the qspinlock back to par with that of the simple lock: simple-spinlock vanilla dirty Hmean 14 73.50 ( 0.00%) 54.44 * -25.93%* 73.45 * -0.07%* Hmean 100 654.47 ( 0.00%) 385.61 * -41.08%* 771.43 * 17.87%* Hmean 300 2719.39 ( 0.00%) 2181.67 * -19.77%* 2666.50 * -1.94%* Hmean 500 4400.59 ( 0.00%) 3390.77 * -22.95%* 4322.14 * -1.78%* Hmean 850 6726.21 ( 0.00%) 5264.03 * -21.74%* 6863.12 * 2.04%* 3. dbench (tmpfs) Configured to run with up to ncpusx8 clients, it shows both latency and throughput metrics. For the latency, with the exception of the 64 case, there is really nothing to go by: vanilla dirty Amean latency-1 1.67 ( 0.00%) 1.67 * 0.09%* Amean latency-2 2.15 ( 0.00%) 2.08 * 3.36%* Amean latency-4 2.50 ( 0.00%) 2.56 * -2.27%* Amean latency-8 2.49 ( 0.00%) 2.48 * 0.31%* Amean latency-16 2.69 ( 0.00%) 2.72 * -1.37%* Amean latency-32 2.96 ( 0.00%) 3.04 * -2.60%* Amean latency-64 7.78 ( 0.00%) 8.17 * -5.07%* Amean latency-512 186.91 ( 0.00%) 186.41 * 0.27%* For the dbench4 Throughput (misleading but traditional) there's a small but rather constant improvement: vanilla dirty Hmean 1 849.13 ( 0.00%) 851.51 * 0.28%* Hmean 2 1664.03 ( 0.00%) 1663.94 * -0.01%* Hmean 4 3073.70 ( 0.00%) 3104.29 * 1.00%* Hmean 8 5624.02 ( 0.00%) 5694.16 * 1.25%* Hmean 16 9169.49 ( 0.00%) 9324.43 * 1.69%* Hmean 32 11969.37 ( 0.00%) 12127.09 * 1.32%* Hmean 64 15021.12 ( 0.00%) 15243.14 * 1.48%* Hmean 512 14891.27 ( 0.00%) 15162.11 * 1.82%* Measuring the dbench4 Per-VFS Operation latency, shows some very minor differences within the noise level, around the 0-1% ranges. Fixes: 49a7d46a06c3 ("powerpc: Implement smp_cond_load_relaxed()") Acked-by: Nicholas Piggin Signed-off-by: Davidlohr Bueso Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210318204702.71417-1-dave@stgolabs.net --- arch/powerpc/include/asm/barrier.h | 16 ---------------- arch/powerpc/include/asm/qspinlock.h | 7 +++++++ 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index aecfde829d5da4..7ae29cfb06c07f 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -80,22 +80,6 @@ do { \ ___p1; \ }) -#ifdef CONFIG_PPC64 -#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ - typeof(ptr) __PTR = (ptr); \ - __unqual_scalar_typeof(*ptr) VAL; \ - VAL = READ_ONCE(*__PTR); \ - if (unlikely(!(cond_expr))) { \ - spin_begin(); \ - do { \ - VAL = READ_ONCE(*__PTR); \ - } while (!(cond_expr)); \ - spin_end(); \ - } \ - (typeof(*ptr))VAL; \ -}) -#endif - #ifdef CONFIG_PPC_BOOK3S_64 #define NOSPEC_BARRIER_SLOT nop #elif defined(CONFIG_PPC_FSL_BOOK3E) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index b052b062481628..07318bc63e3d03 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -72,6 +72,13 @@ static inline void pv_spinlocks_init(void) #endif +/* + * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait, + * which was found to have performance problems if implemented with + * the preferred spin_begin()/spin_end() SMT priority pattern. Use the + * generic version instead. + */ + #include #endif /* _ASM_POWERPC_QSPINLOCK_H */ From 9466c1799fa2acb68e505a264dcdf53779101ac6 Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Fri, 26 Feb 2021 19:12:50 -0600 Subject: [PATCH 025/302] powerpc/uaccess: Add unsafe_copy_from_user() Use the same approach as unsafe_copy_to_user() but instead call unsafe_get_user() in a loop. Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210227011259.11992-2-cmr@codefail.de --- arch/powerpc/include/asm/uaccess.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 479cb30eabd715..c3d3d178fa0e14 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -486,6 +486,27 @@ user_write_access_begin(const void __user *ptr, size_t len) #define unsafe_put_user(x, p, e) \ __unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) +#define unsafe_copy_from_user(d, s, l, e) \ +do { \ + u8 *_dst = (u8 *)(d); \ + const u8 __user *_src = (const u8 __user *)(s); \ + size_t _len = (l); \ + int _i; \ + \ + for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long)) \ + unsafe_get_user(*(long *)(_dst + _i), (long __user *)(_src + _i), e); \ + if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) { \ + unsafe_get_user(*(u32 *)(_dst + _i), (u32 __user *)(_src + _i), e); \ + _i += 4; \ + } \ + if (_len & 2) { \ + unsafe_get_user(*(u16 *)(_dst + _i), (u16 __user *)(_src + _i), e); \ + _i += 2; \ + } \ + if (_len & 1) \ + unsafe_get_user(*(u8 *)(_dst + _i), (u8 __user *)(_src + _i), e); \ +} while (0) + #define unsafe_copy_to_user(d, s, l, e) \ do { \ u8 __user *_dst = (u8 __user *)(d); \ From 609355dfc88e2921bfcbd879300d482a9a33378e Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Fri, 26 Feb 2021 19:12:51 -0600 Subject: [PATCH 026/302] powerpc/signal: Add unsafe_copy_{vsx, fpr}_from_user() Reuse the "safe" implementation from signal.c but call unsafe_get_user() directly in a loop to avoid the intermediate copy into a local buffer. Signed-off-by: Christopher M. Riedl Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210227011259.11992-3-cmr@codefail.de --- arch/powerpc/kernel/signal.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 2559a681536eaa..d8dd76b1dc9404 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -53,6 +53,26 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); &buf[i], label);\ } while (0) +#define unsafe_copy_fpr_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1; i++) \ + unsafe_get_user(__t->thread.TS_FPR(i), &buf[i], label); \ + unsafe_get_user(__t->thread.fp_state.fpscr, &buf[i], label); \ +} while (0) + +#define unsafe_copy_vsx_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label); \ +} while (0) + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM #define unsafe_copy_ckfpr_to_user(to, task, label) do { \ struct task_struct *__t = task; \ @@ -80,6 +100,10 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); unsafe_copy_to_user(to, (task)->thread.fp_state.fpr, \ ELF_NFPREG * sizeof(double), label) +#define unsafe_copy_fpr_from_user(task, from, label) \ + unsafe_copy_from_user((task)->thread.fp_state.fpr, from, \ + ELF_NFPREG * sizeof(double), label) + static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { @@ -115,6 +139,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from) #else #define unsafe_copy_fpr_to_user(to, task, label) do { } while (0) +#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0) + static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { From c6c9645e37483444ec5182373455b2f22e4b1535 Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Fri, 26 Feb 2021 19:12:52 -0600 Subject: [PATCH 027/302] powerpc/signal64: Remove non-inline calls from setup_sigcontext() The majority of setup_sigcontext() can be refactored to execute in an "unsafe" context assuming an open uaccess window except for some non-inline function calls. Move these out into a separate prepare_setup_sigcontext() function which must be called first and before opening up a uaccess window. Non-inline function calls should be avoided during a uaccess window for a few reasons: - KUAP should be enabled for as much kernel code as possible. Opening a uaccess window disables KUAP which means any code executed during this time contributes to a potential attack surface. - Non-inline functions default to traceable which means they are instrumented for ftrace. This adds more code which could run with KUAP disabled. - Powerpc does not currently support the objtool UACCESS checks. All code running with uaccess must be audited manually which means: less code -> less work -> fewer problems (in theory). A follow-up commit converts setup_sigcontext() to be "unsafe". Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210227011259.11992-4-cmr@codefail.de --- arch/powerpc/kernel/signal_64.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index f9e4a1ac440fb0..6ca546192cbfbe 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -79,6 +79,24 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc) } #endif +static void prepare_setup_sigcontext(struct task_struct *tsk) +{ +#ifdef CONFIG_ALTIVEC + /* save altivec registers */ + if (tsk->thread.used_vr) + flush_altivec_to_thread(tsk); + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + tsk->thread.vrsave = mfspr(SPRN_VRSAVE); +#endif /* CONFIG_ALTIVEC */ + + flush_fp_to_thread(tsk); + +#ifdef CONFIG_VSX + if (tsk->thread.used_vsr) + flush_vsx_to_thread(tsk); +#endif /* CONFIG_VSX */ +} + /* * Set up the sigcontext for the signal frame. */ @@ -97,7 +115,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, */ #ifdef CONFIG_ALTIVEC elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc); - unsigned long vrsave; #endif struct pt_regs *regs = tsk->thread.regs; unsigned long msr = regs->msr; @@ -112,7 +129,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, /* save altivec registers */ if (tsk->thread.used_vr) { - flush_altivec_to_thread(tsk); /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ err |= __copy_to_user(v_regs, &tsk->thread.vr_state, 33 * sizeof(vector128)); @@ -124,17 +140,10 @@ static long setup_sigcontext(struct sigcontext __user *sc, /* We always copy to/from vrsave, it's 0 if we don't have or don't * use altivec. */ - vrsave = 0; - if (cpu_has_feature(CPU_FTR_ALTIVEC)) { - vrsave = mfspr(SPRN_VRSAVE); - tsk->thread.vrsave = vrsave; - } - - err |= __put_user(vrsave, (u32 __user *)&v_regs[33]); + err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]); #else /* CONFIG_ALTIVEC */ err |= __put_user(0, &sc->v_regs); #endif /* CONFIG_ALTIVEC */ - flush_fp_to_thread(tsk); /* copy fpr regs and fpscr */ err |= copy_fpr_to_user(&sc->fp_regs, tsk); @@ -150,7 +159,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, * VMX data. */ if (tsk->thread.used_vsr && ctx_has_vsx_region) { - flush_vsx_to_thread(tsk); v_regs += ELF_NVRREG; err |= copy_vsx_to_user(v_regs, tsk); /* set MSR_VSX in the MSR value in the frame to @@ -655,6 +663,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, ctx_has_vsx_region = 1; if (old_ctx != NULL) { + prepare_setup_sigcontext(current); if (!access_ok(old_ctx, ctx_size) || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0, ctx_has_vsx_region) @@ -842,6 +851,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #endif { err |= __put_user(0, &frame->uc.uc_link); + prepare_setup_sigcontext(tsk); err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, NULL, (unsigned long)ksig->ka.sa.sa_handler, 1); From 1a130b67c682be9842f188f593c2080786de4204 Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Fri, 26 Feb 2021 19:12:53 -0600 Subject: [PATCH 028/302] powerpc: Reference parameter in MSR_TM_ACTIVE() macro Unlike the other MSR_TM_* macros, MSR_TM_ACTIVE does not reference or use its parameter unless CONFIG_PPC_TRANSACTIONAL_MEM is defined. This causes an 'unused variable' compile warning unless the variable is also guarded with CONFIG_PPC_TRANSACTIONAL_MEM. Reference but do nothing with the argument in the macro to avoid a potential compile warning. Signed-off-by: Christopher M. Riedl Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210227011259.11992-5-cmr@codefail.de --- arch/powerpc/include/asm/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index da103e92c1126a..1be20bc8dce2f5 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -124,7 +124,7 @@ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM #define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */ #else -#define MSR_TM_ACTIVE(x) 0 +#define MSR_TM_ACTIVE(x) ((void)(x), 0) #endif #if defined(CONFIG_PPC_BOOK3S_64) From 2d19630e20fe5fbd5813f73fd5b1c81ddec61369 Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Fri, 26 Feb 2021 19:12:54 -0600 Subject: [PATCH 029/302] powerpc/signal64: Remove TM ifdefery in middle of if/else block Both rt_sigreturn() and handle_rt_signal_64() contain TM-related ifdefs which break-up an if/else block. Provide stubs for the ifdef-guarded TM functions and remove the need for an ifdef in rt_sigreturn(). Rework the remaining TM ifdef in handle_rt_signal64() similar to commit f1cf4f93de2f ("powerpc/signal32: Remove ifdefery in middle of if/else"). Unlike in the commit for ppc32, the ifdef can't be removed entirely since uc_transact in sigframe depends on CONFIG_PPC_TRANSACTIONAL_MEM. Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210227011259.11992-6-cmr@codefail.de --- arch/powerpc/kernel/process.c | 3 +- arch/powerpc/kernel/signal_64.c | 102 ++++++++++++++++---------------- 2 files changed, 54 insertions(+), 51 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3231c2df9e261f..afb334dfb6a89f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1117,9 +1117,10 @@ void restore_tm_state(struct pt_regs *regs) regs->msr |= msr_diff; } -#else +#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */ #define tm_recheckpoint_new_task(new) #define __switch_to_tm(prev, new) +void tm_reclaim_current(uint8_t cause) {} #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ static inline void save_sprs(struct thread_struct *t) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 6ca546192cbfbe..bd8d210c91157c 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -594,6 +594,12 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, return err; } +#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */ +static long restore_tm_sigcontexts(struct task_struct *tsk, struct sigcontext __user *sc, + struct sigcontext __user *tm_sc) +{ + return -EINVAL; +} #endif /* @@ -710,9 +716,7 @@ SYSCALL_DEFINE0(rt_sigreturn) struct pt_regs *regs = current_pt_regs(); struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1]; sigset_t set; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM unsigned long msr; -#endif /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -724,48 +728,50 @@ SYSCALL_DEFINE0(rt_sigreturn) goto badframe; set_current_blocked(&set); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* - * If there is a transactional state then throw it away. - * The purpose of a sigreturn is to destroy all traces of the - * signal frame, this includes any transactional state created - * within in. We only check for suspended as we can never be - * active in the kernel, we are active, there is nothing better to - * do than go ahead and Bad Thing later. - * The cause is not important as there will never be a - * recheckpoint so it's not user visible. - */ - if (MSR_TM_SUSPENDED(mfmsr())) - tm_reclaim_current(0); + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM)) { + /* + * If there is a transactional state then throw it away. + * The purpose of a sigreturn is to destroy all traces of the + * signal frame, this includes any transactional state created + * within in. We only check for suspended as we can never be + * active in the kernel, we are active, there is nothing better to + * do than go ahead and Bad Thing later. + * The cause is not important as there will never be a + * recheckpoint so it's not user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); - /* - * Disable MSR[TS] bit also, so, if there is an exception in the - * code below (as a page fault in copy_ckvsx_to_user()), it does - * not recheckpoint this task if there was a context switch inside - * the exception. - * - * A major page fault can indirectly call schedule(). A reschedule - * process in the middle of an exception can have a side effect - * (Changing the CPU MSR[TS] state), since schedule() is called - * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended - * (switch_to() calls tm_recheckpoint() for the 'new' process). In - * this case, the process continues to be the same in the CPU, but - * the CPU state just changed. - * - * This can cause a TM Bad Thing, since the MSR in the stack will - * have the MSR[TS]=0, and this is what will be used to RFID. - * - * Clearing MSR[TS] state here will avoid a recheckpoint if there - * is any process reschedule in kernel space. The MSR[TS] state - * does not need to be saved also, since it will be replaced with - * the MSR[TS] that came from user context later, at - * restore_tm_sigcontexts. - */ - regs->msr &= ~MSR_TS_MASK; + /* + * Disable MSR[TS] bit also, so, if there is an exception in the + * code below (as a page fault in copy_ckvsx_to_user()), it does + * not recheckpoint this task if there was a context switch inside + * the exception. + * + * A major page fault can indirectly call schedule(). A reschedule + * process in the middle of an exception can have a side effect + * (Changing the CPU MSR[TS] state), since schedule() is called + * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended + * (switch_to() calls tm_recheckpoint() for the 'new' process). In + * this case, the process continues to be the same in the CPU, but + * the CPU state just changed. + * + * This can cause a TM Bad Thing, since the MSR in the stack will + * have the MSR[TS]=0, and this is what will be used to RFID. + * + * Clearing MSR[TS] state here will avoid a recheckpoint if there + * is any process reschedule in kernel space. The MSR[TS] state + * does not need to be saved also, since it will be replaced with + * the MSR[TS] that came from user context later, at + * restore_tm_sigcontexts. + */ + regs->msr &= ~MSR_TS_MASK; - if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) - goto badframe; - if (MSR_TM_ACTIVE(msr)) { + if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) + goto badframe; + } + + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && MSR_TM_ACTIVE(msr)) { /* We recheckpoint on return. */ struct ucontext __user *uc_transact; @@ -778,9 +784,7 @@ SYSCALL_DEFINE0(rt_sigreturn) if (restore_tm_sigcontexts(current, &uc->uc_mcontext, &uc_transact->uc_mcontext)) goto badframe; - } else -#endif - { + } else { /* * Fall through, for non-TM restore * @@ -818,10 +822,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long newsp = 0; long err = 0; struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* Save the thread's msr before get_tm_stackpointer() changes it */ unsigned long msr = regs->msr; -#endif frame = get_sigframe(ksig, tsk, sizeof(*frame), 0); if (!access_ok(frame, sizeof(*frame))) @@ -836,8 +838,9 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(msr)) { +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* The ucontext_t passed to userland points to the second * ucontext_t (for transactional state) with its uc_link ptr. */ @@ -847,9 +850,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, tsk, ksig->sig, NULL, (unsigned long)ksig->ka.sa.sa_handler, msr); - } else #endif - { + } else { err |= __put_user(0, &frame->uc.uc_link); prepare_setup_sigcontext(tsk); err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, From 7bb081c8f043ab166f8c6f26fca744821217dad7 Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Fri, 26 Feb 2021 19:12:55 -0600 Subject: [PATCH 030/302] powerpc/signal64: Replace setup_sigcontext() w/ unsafe_setup_sigcontext() Previously setup_sigcontext() performed a costly KUAP switch on every uaccess operation. These repeated uaccess switches cause a significant drop in signal handling performance. Rewrite setup_sigcontext() to assume that a userspace write access window is open by replacing all uaccess functions with their 'unsafe' versions. Modify the callers to first open, call unsafe_setup_sigcontext() and then close the uaccess window. Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210227011259.11992-7-cmr@codefail.de --- arch/powerpc/kernel/signal_64.c | 72 ++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index bd8d210c91157c..78ae4bb4e5904b 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -101,9 +101,14 @@ static void prepare_setup_sigcontext(struct task_struct *tsk) * Set up the sigcontext for the signal frame. */ -static long setup_sigcontext(struct sigcontext __user *sc, - struct task_struct *tsk, int signr, sigset_t *set, - unsigned long handler, int ctx_has_vsx_region) +#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region, label)\ +do { \ + if (__unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region))\ + goto label; \ +} while (0) +static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc, + struct task_struct *tsk, int signr, sigset_t *set, + unsigned long handler, int ctx_has_vsx_region) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the * process never used altivec yet (MSR_VEC is zero in pt_regs of @@ -118,20 +123,19 @@ static long setup_sigcontext(struct sigcontext __user *sc, #endif struct pt_regs *regs = tsk->thread.regs; unsigned long msr = regs->msr; - long err = 0; /* Force usr to alway see softe as 1 (interrupts enabled) */ unsigned long softe = 0x1; BUG_ON(tsk != current); #ifdef CONFIG_ALTIVEC - err |= __put_user(v_regs, &sc->v_regs); + unsafe_put_user(v_regs, &sc->v_regs, efault_out); /* save altivec registers */ if (tsk->thread.used_vr) { /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ - err |= __copy_to_user(v_regs, &tsk->thread.vr_state, - 33 * sizeof(vector128)); + unsafe_copy_to_user(v_regs, &tsk->thread.vr_state, + 33 * sizeof(vector128), efault_out); /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg) * contains valid data. */ @@ -140,12 +144,12 @@ static long setup_sigcontext(struct sigcontext __user *sc, /* We always copy to/from vrsave, it's 0 if we don't have or don't * use altivec. */ - err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]); + unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out); #else /* CONFIG_ALTIVEC */ - err |= __put_user(0, &sc->v_regs); + unsafe_put_user(0, &sc->v_regs, efault_out); #endif /* CONFIG_ALTIVEC */ /* copy fpr regs and fpscr */ - err |= copy_fpr_to_user(&sc->fp_regs, tsk); + unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out); /* * Clear the MSR VSX bit to indicate there is no valid state attached @@ -160,24 +164,27 @@ static long setup_sigcontext(struct sigcontext __user *sc, */ if (tsk->thread.used_vsr && ctx_has_vsx_region) { v_regs += ELF_NVRREG; - err |= copy_vsx_to_user(v_regs, tsk); + unsafe_copy_vsx_to_user(v_regs, tsk, efault_out); /* set MSR_VSX in the MSR value in the frame to * indicate that sc->vs_reg) contains valid data. */ msr |= MSR_VSX; } #endif /* CONFIG_VSX */ - err |= __put_user(&sc->gp_regs, &sc->regs); + unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out); WARN_ON(!FULL_REGS(regs)); - err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); - err |= __put_user(msr, &sc->gp_regs[PT_MSR]); - err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]); - err |= __put_user(signr, &sc->signal); - err |= __put_user(handler, &sc->handler); + unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out); + unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out); + unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out); + unsafe_put_user(signr, &sc->signal, efault_out); + unsafe_put_user(handler, &sc->handler, efault_out); if (set != NULL) - err |= __put_user(set->sig[0], &sc->oldmask); + unsafe_put_user(set->sig[0], &sc->oldmask, efault_out); - return err; + return 0; + +efault_out: + return -EFAULT; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -670,12 +677,15 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, if (old_ctx != NULL) { prepare_setup_sigcontext(current); - if (!access_ok(old_ctx, ctx_size) - || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0, - ctx_has_vsx_region) - || __copy_to_user(&old_ctx->uc_sigmask, - ¤t->blocked, sizeof(sigset_t))) + if (!user_write_access_begin(old_ctx, ctx_size)) return -EFAULT; + + unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, + 0, ctx_has_vsx_region, efault_out); + unsafe_copy_to_user(&old_ctx->uc_sigmask, ¤t->blocked, + sizeof(sigset_t), efault_out); + + user_write_access_end(); } if (new_ctx == NULL) return 0; @@ -704,6 +714,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, /* This returns like rt_sigreturn */ set_thread_flag(TIF_RESTOREALL); return 0; + +efault_out: + user_write_access_end(); + return -EFAULT; } @@ -854,9 +868,13 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, } else { err |= __put_user(0, &frame->uc.uc_link); prepare_setup_sigcontext(tsk); - err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, - NULL, (unsigned long)ksig->ka.sa.sa_handler, - 1); + if (!user_write_access_begin(&frame->uc.uc_mcontext, + sizeof(frame->uc.uc_mcontext))) + return -EFAULT; + err |= __unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, + ksig->sig, NULL, + (unsigned long)ksig->ka.sa.sa_handler, 1); + user_write_access_end(); } err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) From 193323e1009437c0885240e75ca71f7963e4a006 Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Fri, 26 Feb 2021 19:12:56 -0600 Subject: [PATCH 031/302] powerpc/signal64: Replace restore_sigcontext() w/ unsafe_restore_sigcontext() Previously restore_sigcontext() performed a costly KUAP switch on every uaccess operation. These repeated uaccess switches cause a significant drop in signal handling performance. Rewrite restore_sigcontext() to assume that a userspace read access window is open by replacing all uaccess functions with their 'unsafe' versions. Modify the callers to first open, call unsafe_restore_sigcontext(), and then close the uaccess window. Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210227011259.11992-8-cmr@codefail.de --- arch/powerpc/kernel/signal_64.c | 68 ++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 78ae4bb4e5904b..23a44ec3ac01b9 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -327,14 +327,16 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, /* * Restore the sigcontext from the signal frame. */ - -static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, - struct sigcontext __user *sc) +#define unsafe_restore_sigcontext(tsk, set, sig, sc, label) do { \ + if (__unsafe_restore_sigcontext(tsk, set, sig, sc)) \ + goto label; \ +} while (0) +static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set, + int sig, struct sigcontext __user *sc) { #ifdef CONFIG_ALTIVEC elf_vrreg_t __user *v_regs; #endif - unsigned long err = 0; unsigned long save_r13 = 0; unsigned long msr; struct pt_regs *regs = tsk->thread.regs; @@ -349,27 +351,27 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, save_r13 = regs->gpr[13]; /* copy the GPRs */ - err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr)); - err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]); + unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr), efault_out); + unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out); /* get MSR separately, transfer the LE bit if doing signal return */ - err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out); if (sig) regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); - err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]); - err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]); - err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]); - err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]); - err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]); + unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out); + unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out); + unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out); + unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out); + unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out); /* Don't allow userspace to set SOFTE */ set_trap_norestart(regs); - err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); - err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); - err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); + unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out); + unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out); + unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out); if (!sig) regs->gpr[13] = save_r13; if (set != NULL) - err |= __get_user(set->sig[0], &sc->oldmask); + unsafe_get_user(set->sig[0], &sc->oldmask, efault_out); /* * Force reload of FP/VEC. @@ -379,29 +381,27 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); #ifdef CONFIG_ALTIVEC - err |= __get_user(v_regs, &sc->v_regs); - if (err) - return err; + unsafe_get_user(v_regs, &sc->v_regs, efault_out); if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128))) return -EFAULT; /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ if (v_regs != NULL && (msr & MSR_VEC) != 0) { - err |= __copy_from_user(&tsk->thread.vr_state, v_regs, - 33 * sizeof(vector128)); + unsafe_copy_from_user(&tsk->thread.vr_state, v_regs, + 33 * sizeof(vector128), efault_out); tsk->thread.used_vr = true; } else if (tsk->thread.used_vr) { memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128)); } /* Always get VRSAVE back */ if (v_regs != NULL) - err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]); + unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out); else tsk->thread.vrsave = 0; if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, tsk->thread.vrsave); #endif /* CONFIG_ALTIVEC */ /* restore floating point */ - err |= copy_fpr_from_user(tsk, &sc->fp_regs); + unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out); #ifdef CONFIG_VSX /* * Get additional VSX data. Update v_regs to point after the @@ -410,14 +410,17 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, */ v_regs += ELF_NVRREG; if ((msr & MSR_VSX) != 0) { - err |= copy_vsx_from_user(tsk, v_regs); + unsafe_copy_vsx_from_user(tsk, v_regs, efault_out); tsk->thread.used_vsr = true; } else { for (i = 0; i < 32 ; i++) tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; } #endif - return err; + return 0; + +efault_out: + return -EFAULT; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -708,8 +711,14 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set))) do_exit(SIGSEGV); set_current_blocked(&set); - if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) + + if (!user_read_access_begin(new_ctx, ctx_size)) + return -EFAULT; + if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) { + user_read_access_end(); do_exit(SIGSEGV); + } + user_read_access_end(); /* This returns like rt_sigreturn */ set_thread_flag(TIF_RESTOREALL); @@ -812,8 +821,13 @@ SYSCALL_DEFINE0(rt_sigreturn) * causing a TM bad thing. */ current->thread.regs->msr &= ~MSR_TS_MASK; - if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) + if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext))) + return -EFAULT; + if (__unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) { + user_read_access_end(); goto badframe; + } + user_read_access_end(); } if (restore_altstack(&uc->uc_stack)) From 96d7a4e06fab9fbc4f67c563af65b073902f3e61 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 26 Feb 2021 19:12:57 -0600 Subject: [PATCH 032/302] powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess switches Add uaccess blocks and use the 'unsafe' versions of functions doing user access where possible to reduce the number of times uaccess has to be opened/closed. There is no 'unsafe' version of copy_siginfo_to_user, so move it slightly to allow for a "longer" uaccess block. Co-developed-by: Christopher M. Riedl Signed-off-by: Daniel Axtens Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210227011259.11992-9-cmr@codefail.de --- arch/powerpc/kernel/signal_64.c | 57 +++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 23a44ec3ac01b9..057507138320fd 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -854,45 +854,53 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long msr = regs->msr; frame = get_sigframe(ksig, tsk, sizeof(*frame), 0); - if (!access_ok(frame, sizeof(*frame))) - goto badframe; - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, &ksig->info); - if (err) + /* + * This only applies when calling unsafe_setup_sigcontext() and must be + * called before opening the uaccess window. + */ + if (!MSR_TM_ACTIVE(msr)) + prepare_setup_sigcontext(tsk); + + if (!user_write_access_begin(frame, sizeof(*frame))) goto badframe; + unsafe_put_user(&frame->info, &frame->pinfo, badframe_block); + unsafe_put_user(&frame->uc, &frame->puc, badframe_block); + /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); - err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]); + unsafe_put_user(0, &frame->uc.uc_flags, badframe_block); + unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block); if (MSR_TM_ACTIVE(msr)) { #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* The ucontext_t passed to userland points to the second * ucontext_t (for transactional state) with its uc_link ptr. */ - err |= __put_user(&frame->uc_transact, &frame->uc.uc_link); + unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block); + + user_write_access_end(); + err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, tsk, ksig->sig, NULL, (unsigned long)ksig->ka.sa.sa_handler, msr); + + if (!user_write_access_begin(&frame->uc.uc_sigmask, + sizeof(frame->uc.uc_sigmask))) + goto badframe; + #endif } else { - err |= __put_user(0, &frame->uc.uc_link); - prepare_setup_sigcontext(tsk); - if (!user_write_access_begin(&frame->uc.uc_mcontext, - sizeof(frame->uc.uc_mcontext))) - return -EFAULT; - err |= __unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, - ksig->sig, NULL, - (unsigned long)ksig->ka.sa.sa_handler, 1); - user_write_access_end(); + unsafe_put_user(0, &frame->uc.uc_link, badframe_block); + unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, + NULL, (unsigned long)ksig->ka.sa.sa_handler, + 1, badframe_block); } - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - goto badframe; + + unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block); + user_write_access_end(); /* Make sure signal handler doesn't get spurious FP exceptions */ tsk->thread.fp_state.fpscr = 0; @@ -907,6 +915,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, regs->nip = (unsigned long) &frame->tramp[0]; } + + /* Save the siginfo outside of the unsafe block. */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + /* Allocate a dummy caller frame for the signal handler. */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); @@ -946,6 +959,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, return 0; +badframe_block: + user_write_access_end(); badframe: signal_fault(current, regs, "handle_rt_signal64", frame); From 0f92433b8f9f76608528101e7a81cd3bfd00e236 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 26 Feb 2021 19:12:58 -0600 Subject: [PATCH 033/302] powerpc/signal64: Rewrite rt_sigreturn() to minimise uaccess switches Add uaccess blocks and use the 'unsafe' versions of functions doing user access where possible to reduce the number of times uaccess has to be opened/closed. Co-developed-by: Christopher M. Riedl Signed-off-by: Daniel Axtens Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210227011259.11992-10-cmr@codefail.de --- arch/powerpc/kernel/signal_64.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 057507138320fd..e7d612e8236385 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -822,11 +822,11 @@ SYSCALL_DEFINE0(rt_sigreturn) */ current->thread.regs->msr &= ~MSR_TS_MASK; if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext))) - return -EFAULT; - if (__unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) { - user_read_access_end(); goto badframe; - } + + unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext, + badframe_block); + user_read_access_end(); } @@ -836,6 +836,8 @@ SYSCALL_DEFINE0(rt_sigreturn) set_thread_flag(TIF_RESTOREALL); return 0; +badframe_block: + user_read_access_end(); badframe: signal_fault(current, regs, "rt_sigreturn", uc); From d3ccc9781560af051554017c702631560bdc0811 Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Fri, 26 Feb 2021 19:12:59 -0600 Subject: [PATCH 034/302] powerpc/signal: Use __get_user() to copy sigset_t Usually sigset_t is exactly 8B which is a "trivial" size and does not warrant using __copy_from_user(). Use __get_user() directly in anticipation of future work to remove the trivial size optimizations from __copy_from_user(). The ppc32 implementation of get_sigset_t() previously called copy_from_user() which, unlike __copy_from_user(), calls access_ok(). Replacing this w/ __get_user() (no access_ok()) is fine here since both callsites in signal_32.c are preceded by an earlier access_ok(). Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210227011259.11992-11-cmr@codefail.de --- arch/powerpc/kernel/signal.h | 7 +++++++ arch/powerpc/kernel/signal_32.c | 2 +- arch/powerpc/kernel/signal_64.c | 4 ++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index d8dd76b1dc9404..1393876f38143f 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -19,6 +19,13 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); +static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src) +{ + BUILD_BUG_ON(sizeof(sigset_t) != sizeof(u64)); + + return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]); +} + #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 75ee918a120a5e..c505b444a6131e 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -144,7 +144,7 @@ static inline int restore_general_regs(struct pt_regs *regs, static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) { - return copy_from_user(set, uset, sizeof(*uset)); + return __get_user_sigset(set, uset); } #define to_user_ptr(p) ((unsigned long)(p)) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index e7d612e8236385..e10459f11f8e03 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -708,7 +708,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * We kill the task with a SIGSEGV in this situation. */ - if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set))) + if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) do_exit(SIGSEGV); set_current_blocked(&set); @@ -747,7 +747,7 @@ SYSCALL_DEFINE0(rt_sigreturn) if (!access_ok(uc, sizeof(*uc))) goto badframe; - if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) + if (__get_user_sigset(&set, &uc->uc_sigmask)) goto badframe; set_current_blocked(&set); From d943bc742a6aabc578b6b62a713ceedf8bf16623 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 11 Mar 2021 14:45:38 +0530 Subject: [PATCH 035/302] powerpc/uprobes: Validation for prefixed instruction As per ISA 3.1, prefixed instruction should not cross 64-byte boundary. So don't allow Uprobe on such prefixed instruction. There are two ways probed instruction is changed in mapped pages. First, when Uprobe is activated, it searches for all the relevant pages and replace instruction in them. In this case, if that probe is on the 64-byte unaligned prefixed instruction, error out directly. Second, when Uprobe is already active and user maps a relevant page via mmap(), instruction is replaced via mmap() code path. But because Uprobe is invalid, entire mmap() operation can not be stopped. In this case just print an error and continue. Signed-off-by: Ravi Bangoria Acked-by: Naveen N. Rao Acked-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210311091538.368590-1-ravi.bangoria@linux.ibm.com --- arch/powerpc/kernel/uprobes.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index e8a63713e65542..186f69b11e94d2 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -41,6 +41,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, if (addr & 0x03) return -EINVAL; + if (cpu_has_feature(CPU_FTR_ARCH_31) && + ppc_inst_prefixed(auprobe->insn) && + (addr & 0x3f) == 60) { + pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n"); + return -EINVAL; + } + return 0; } From 2d9f69bc5a5a75579b410beb0dc3d313be762c9f Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 10 Mar 2021 18:44:05 +0100 Subject: [PATCH 036/302] cxl: don't manipulate the mm.mm_users field directly It is better to rely on the API provided by the MM layer instead of directly manipulating the mm_users field. Signed-off-by: Laurent Dufour Acked-by: Frederic Barrat Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210310174405.51044-1-ldufour@linux.ibm.com --- drivers/misc/cxl/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index 01153b74334a1b..60c829113299bd 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -200,7 +200,7 @@ static struct mm_struct *get_mem_context(struct cxl_context *ctx) if (ctx->mm == NULL) return NULL; - if (!atomic_inc_not_zero(&ctx->mm->mm_users)) + if (!mmget_not_zero(ctx->mm)) return NULL; return ctx->mm; From a58cbed68315111c663f35603a42547f72acd6f8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:10 +0000 Subject: [PATCH 037/302] powerpc/traps: Declare unrecoverable_exception() as __noreturn unrecoverable_exception() is never expected to return, most callers have an infiniteloop in case it returns. Ensure it really never returns by terminating it with a BUG(), and declare it __no_return. It always GCC to really simplify functions calling it. In the exemple below, it avoids the stack frame in the likely fast path and avoids code duplication for the exit. With this patch: 00000348 : 348: 81 43 00 84 lwz r10,132(r3) 34c: 71 48 00 02 andi. r8,r10,2 350: 41 82 00 2c beq 37c 354: 71 4a 40 00 andi. r10,r10,16384 358: 40 82 00 20 bne 378 35c: 80 62 00 70 lwz r3,112(r2) 360: 74 63 00 01 andis. r3,r3,1 364: 40 82 00 28 bne 38c 368: 7d 40 00 a6 mfmsr r10 36c: 7c 11 13 a6 mtspr 81,r0 370: 7c 12 13 a6 mtspr 82,r0 374: 4e 80 00 20 blr 378: 48 00 00 00 b 378 37c: 94 21 ff f0 stwu r1,-16(r1) 380: 7c 08 02 a6 mflr r0 384: 90 01 00 14 stw r0,20(r1) 388: 48 00 00 01 bl 388 388: R_PPC_REL24 unrecoverable_exception 38c: 38 e2 00 70 addi r7,r2,112 390: 3d 00 00 01 lis r8,1 394: 7c c0 38 28 lwarx r6,0,r7 398: 7c c6 40 78 andc r6,r6,r8 39c: 7c c0 39 2d stwcx. r6,0,r7 3a0: 40 a2 ff f4 bne 394 3a4: 38 60 00 01 li r3,1 3a8: 4b ff ff c0 b 368 Without this patch: 00000348 : 348: 94 21 ff f0 stwu r1,-16(r1) 34c: 93 e1 00 0c stw r31,12(r1) 350: 7c 7f 1b 78 mr r31,r3 354: 81 23 00 84 lwz r9,132(r3) 358: 71 2a 00 02 andi. r10,r9,2 35c: 41 82 00 34 beq 390 360: 71 29 40 00 andi. r9,r9,16384 364: 40 82 00 28 bne 38c 368: 80 62 00 70 lwz r3,112(r2) 36c: 74 63 00 01 andis. r3,r3,1 370: 40 82 00 3c bne 3ac 374: 7d 20 00 a6 mfmsr r9 378: 7c 11 13 a6 mtspr 81,r0 37c: 7c 12 13 a6 mtspr 82,r0 380: 83 e1 00 0c lwz r31,12(r1) 384: 38 21 00 10 addi r1,r1,16 388: 4e 80 00 20 blr 38c: 48 00 00 00 b 38c 390: 7c 08 02 a6 mflr r0 394: 90 01 00 14 stw r0,20(r1) 398: 48 00 00 01 bl 398 398: R_PPC_REL24 unrecoverable_exception 39c: 80 01 00 14 lwz r0,20(r1) 3a0: 81 3f 00 84 lwz r9,132(r31) 3a4: 7c 08 03 a6 mtlr r0 3a8: 4b ff ff b8 b 360 3ac: 39 02 00 70 addi r8,r2,112 3b0: 3d 40 00 01 lis r10,1 3b4: 7c e0 40 28 lwarx r7,0,r8 3b8: 7c e7 50 78 andc r7,r7,r10 3bc: 7c e0 41 2d stwcx. r7,0,r8 3c0: 40 a2 ff f4 bne 3b4 3c4: 38 60 00 01 li r3,1 3c8: 4b ff ff ac b 374 Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1e883e9d93fdb256853d1434c8ad77c257349b2d.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 2 +- arch/powerpc/kernel/traps.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index e8d09a841373b7..232a4847f5969f 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -436,7 +436,7 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); -void unrecoverable_exception(struct pt_regs *regs); +void __noreturn unrecoverable_exception(struct pt_regs *regs); void replay_system_reset(void); void replay_soft_interrupts(void); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 97914ee2fdc950..bb1387351b8f76 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2169,11 +2169,14 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) * in the MSR is 0. This indicates that SRR0/1 are live, and that * we therefore lost state by taking this exception. */ -void unrecoverable_exception(struct pt_regs *regs) +void __noreturn unrecoverable_exception(struct pt_regs *regs) { pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", regs->trap, regs->nip, regs->msr); die("Unrecoverable exception", regs, SIGABRT); + /* die() should not return */ + for (;;) + ; } #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) From 52ae92cc290f0506eef9ad5466bb453ce4a9e80e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:11 +0000 Subject: [PATCH 038/302] powerpc/40x: Don't use SPRN_SPRG_SCRATCH0/1 in TLB miss handlers SPRN_SPRG_SCRATCH5 is used to save SPRN_PID. SPRN_SPRG_SCRATCH6 is already available. SPRN_PID is only 8 bits. We have r12 that contains CR. We only need to preserve CR0, so we have space available in r12 to save PID. Keep PID in r12 and free up SPRN_SPRG_SCRATCH5. Then In TLB miss handlers, instead of using SPRN_SPRG_SCRATCH0 and SPRN_SPRG_SCRATCH1, use SPRN_SPRG_SCRATCH5 and SPRN_SPRG_SCRATCH6 to avoid future conflicts with normal exception prologs. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4cdaa85d38e14d594ba902424060ec55babf2c42.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_40x.S | 39 ++++++++++++++++------------------ 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 24724a7dad49a6..383238a98f7765 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -249,13 +249,13 @@ _ENTRY(saved_ksp_limit) * load TLB entries from the page table if they exist. */ START_EXCEPTION(0x1100, DTLBMiss) - mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH6, r11 mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 mfcr r12 mfspr r9, SPRN_PID - mtspr SPRN_SPRG_SCRATCH5, r9 + rlwimi r12, r9, 0, 0xff mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -316,13 +316,12 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ - mfspr r9, SPRN_SPRG_SCRATCH5 - mtspr SPRN_PID, r9 - mtcr r12 + mtspr SPRN_PID, r12 + mtcrf 0x80, r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH6 + mfspr r10, SPRN_SPRG_SCRATCH5 b DataStorage /* 0x1200 - Instruction TLB Miss Exception @@ -330,13 +329,13 @@ _ENTRY(saved_ksp_limit) * registers and bailout to a different point. */ START_EXCEPTION(0x1200, ITLBMiss) - mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH6, r11 mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 mfcr r12 mfspr r9, SPRN_PID - mtspr SPRN_SPRG_SCRATCH5, r9 + rlwimi r12, r9, 0, 0xff mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -397,13 +396,12 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ - mfspr r9, SPRN_SPRG_SCRATCH5 - mtspr SPRN_PID, r9 - mtcr r12 + mtspr SPRN_PID, r12 + mtcrf 0x80, r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH6 + mfspr r10, SPRN_SPRG_SCRATCH5 b InstructionAccess EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD) @@ -543,13 +541,12 @@ finish_tlb_load: /* Done...restore registers and get out of here. */ - mfspr r9, SPRN_SPRG_SCRATCH5 - mtspr SPRN_PID, r9 - mtcr r12 + mtspr SPRN_PID, r12 + mtcrf 0x80, r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH6 + mfspr r10, SPRN_SPRG_SCRATCH5 rfi /* Should sync shadow TLBs */ b . /* prevent prefetch past rfi */ From 9d3c18a11a930afe65d33527300a42e0872c744d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:12 +0000 Subject: [PATCH 039/302] powerpc/40x: Change CRITICAL_EXCEPTION_PROLOG macro to a gas macro Change CRITICAL_EXCEPTION_PROLOG macro to a gas macro to remove the ugly ; and \ on each line. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/73291fb9dc9ec58182c27a40dfc3db204e3f4024.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_40x.S | 71 +++++++++++++++++----------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 383238a98f7765..9cef423d574b47 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -100,42 +100,43 @@ _ENTRY(saved_ksp_limit) * Instead we use a couple of words of memory at low physical addresses. * This is OK since we don't support SMP on these processors. */ -#define CRITICAL_EXCEPTION_PROLOG \ - stw r10,crit_r10@l(0); /* save two registers to work with */\ - stw r11,crit_r11@l(0); \ - mfcr r10; /* save CR in r10 for now */\ - mfspr r11,SPRN_SRR3; /* check whether user or kernel */\ - andi. r11,r11,MSR_PR; \ - lis r11,critirq_ctx@ha; \ - tophys(r11,r11); \ - lwz r11,critirq_ctx@l(r11); \ - beq 1f; \ - /* COMING FROM USER MODE */ \ - mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ - lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\ -1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\ - tophys(r11,r11); \ - stw r10,_CCR(r11); /* save various registers */\ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\ - stw r12,_DEAR(r11); /* since they may have had stuff */\ - mfspr r9,SPRN_ESR; /* in them at the point where the */\ - stw r9,_ESR(r11); /* exception was taken */\ - mfspr r12,SPRN_SRR2; \ - stw r1,GPR1(r11); \ - mfspr r9,SPRN_SRR3; \ - stw r1,0(r11); \ - tovirt(r1,r11); \ - rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11); \ - lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\ - addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ - stw r10, 8(r11); \ - SAVE_4GPRS(3, r11); \ +.macro CRITICAL_EXCEPTION_PROLOG + stw r10,crit_r10@l(0) /* save two registers to work with */ + stw r11,crit_r11@l(0) + mfcr r10 /* save CR in r10 for now */ + mfspr r11,SPRN_SRR3 /* check whether user or kernel */ + andi. r11,r11,MSR_PR + lis r11,critirq_ctx@ha + tophys(r11,r11) + lwz r11,critirq_ctx@l(r11) + beq 1f + /* COMING FROM USER MODE */ + mfspr r11,SPRN_SPRG_THREAD /* if from user, start at top of */ + lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ +1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ + tophys(r11,r11) + stw r10,_CCR(r11) /* save various registers */ + stw r12,GPR12(r11) + stw r9,GPR9(r11) + mflr r10 + stw r10,_LINK(r11) + mfspr r12,SPRN_DEAR /* save DEAR and ESR in the frame */ + stw r12,_DEAR(r11) /* since they may have had stuff */ + mfspr r9,SPRN_ESR /* in them at the point where the */ + stw r9,_ESR(r11) /* exception was taken */ + mfspr r12,SPRN_SRR2 + stw r1,GPR1(r11) + mfspr r9,SPRN_SRR3 + stw r1,0(r11) + tovirt(r1,r11) + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ + stw r0,GPR0(r11) + lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + addi r10, r10, STACK_FRAME_REGS_MARKER@l + stw r10, 8(r11) + SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) +.endm /* * State at this point: From fcd4b43c36c69aa41e79a511edbb06c7020a6061 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:13 +0000 Subject: [PATCH 040/302] powerpc/40x: Save SRR0/SRR1 and r10/r11 earlier in critical exception In order to be able to switch MMU on in exception prolog, save SRR0 and SRR1 earlier. Also save r10 and r11 into stack earlier to better match with the normal exception prolog. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/79a93f253d72dc97ac968c9c62b5066960b688ed.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 9 --------- arch/powerpc/kernel/head_40x.S | 8 ++++++++ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 78c430b7f9d943..8528b4c7f9d3e7 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -107,15 +107,6 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #ifdef CONFIG_40x .globl crit_transfer_to_handler crit_transfer_to_handler: - lwz r0,crit_r10@l(0) - stw r0,GPR10(r11) - lwz r0,crit_r11@l(0) - stw r0,GPR11(r11) - mfspr r0,SPRN_SRR0 - stw r0,crit_srr0@l(0) - mfspr r0,SPRN_SRR1 - stw r0,crit_srr1@l(0) - /* set the stack limit to the current stack */ mfspr r8,SPRN_SPRG_THREAD lwz r0,KSP_LIMIT(r8) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 9cef423d574b47..067ae1302c1ccd 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -103,6 +103,10 @@ _ENTRY(saved_ksp_limit) .macro CRITICAL_EXCEPTION_PROLOG stw r10,crit_r10@l(0) /* save two registers to work with */ stw r11,crit_r11@l(0) + mfspr r10,SPRN_SRR0 + mfspr r11,SPRN_SRR1 + stw r10,crit_srr0@l(0) + stw r11,crit_srr1@l(0) mfcr r10 /* save CR in r10 for now */ mfspr r11,SPRN_SRR3 /* check whether user or kernel */ andi. r11,r11,MSR_PR @@ -120,6 +124,10 @@ _ENTRY(saved_ksp_limit) stw r9,GPR9(r11) mflr r10 stw r10,_LINK(r11) + lwz r10,crit_r10@l(0) + lwz r12,crit_r11@l(0) + stw r10,GPR10(r11) + stw r12,GPR11(r11) mfspr r12,SPRN_DEAR /* save DEAR and ESR in the frame */ stw r12,_DEAR(r11) /* since they may have had stuff */ mfspr r9,SPRN_ESR /* in them at the point where the */ From 26c468860c32022ffe9caf16691764b77fb8eead Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:14 +0000 Subject: [PATCH 041/302] powerpc/40x: Reorder a few instructions in critical exception prolog In order to ease preparation for CONFIG_VMAP_STACK, reorder a few instruction, especially save r1 into stack frame earlier. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c895ecf958c86d1736bdd2ff6f36626b55f35fd2.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_40x.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 067ae1302c1ccd..5b337bf49bcbd4 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -119,6 +119,9 @@ _ENTRY(saved_ksp_limit) lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ 1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ tophys(r11,r11) + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1,r11) stw r10,_CCR(r11) /* save various registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) @@ -129,14 +132,11 @@ _ENTRY(saved_ksp_limit) stw r10,GPR10(r11) stw r12,GPR11(r11) mfspr r12,SPRN_DEAR /* save DEAR and ESR in the frame */ - stw r12,_DEAR(r11) /* since they may have had stuff */ mfspr r9,SPRN_ESR /* in them at the point where the */ + stw r12,_DEAR(r11) /* since they may have had stuff */ stw r9,_ESR(r11) /* exception was taken */ mfspr r12,SPRN_SRR2 - stw r1,GPR1(r11) mfspr r9,SPRN_SRR3 - stw r1,0(r11) - tovirt(r1,r11) rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ stw r0,GPR0(r11) lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ From 0fc1e93481f67a49f67c9168b71842eeb0998b25 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:15 +0000 Subject: [PATCH 042/302] powerpc/40x: Prepare for enabling MMU in critical exception prolog In order the enable MMU early in exception prolog, implement CONFIG_VMAP_STACK principles in critical exception prolog. There is no intention to use CONFIG_VMAP_STACK on 40x, but related code will be used to enable MMU early in exception in a later patch. Also address (critirq_ctx - PAGE_OFFSET) directly instead of using tophys() in order to win one instruction. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3fd75ee54c48307119acdbf66cfea966c1463bbd.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_40x.S | 40 +++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 5b337bf49bcbd4..1468f38c3860a4 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -89,6 +89,12 @@ _ENTRY(crit_srr0) .space 4 _ENTRY(crit_srr1) .space 4 +_ENTRY(crit_r1) + .space 4 +_ENTRY(crit_dear) + .space 4 +_ENTRY(crit_esr) + .space 4 _ENTRY(saved_ksp_limit) .space 4 @@ -107,32 +113,60 @@ _ENTRY(saved_ksp_limit) mfspr r11,SPRN_SRR1 stw r10,crit_srr0@l(0) stw r11,crit_srr1@l(0) +#ifdef CONFIG_VMAP_STACK + mfspr r10,SPRN_DEAR + mfspr r11,SPRN_ESR + stw r10,crit_dear@l(0) + stw r11,crit_esr@l(0) +#endif mfcr r10 /* save CR in r10 for now */ mfspr r11,SPRN_SRR3 /* check whether user or kernel */ andi. r11,r11,MSR_PR - lis r11,critirq_ctx@ha - tophys(r11,r11) - lwz r11,critirq_ctx@l(r11) + lis r11,(critirq_ctx-PAGE_OFFSET)@ha + lwz r11,(critirq_ctx-PAGE_OFFSET)@l(r11) beq 1f /* COMING FROM USER MODE */ mfspr r11,SPRN_SPRG_THREAD /* if from user, start at top of */ lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ +#ifdef CONFIG_VMAP_STACK +1: stw r1,crit_r1@l(0) + addi r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ + LOAD_REG_IMMEDIATE(r11,MSR_KERNEL & ~(MSR_IR | MSR_RI)) + mtmsr r11 + isync + lwz r11,crit_r1@l(0) + stw r11,GPR1(r1) + stw r11,0(r1) + mr r11,r1 +#else 1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ tophys(r11,r11) stw r1,GPR1(r11) stw r1,0(r11) tovirt(r1,r11) +#endif stw r10,_CCR(r11) /* save various registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) mflr r10 stw r10,_LINK(r11) +#ifdef CONFIG_VMAP_STACK + lis r9,PAGE_OFFSET@ha + lwz r10,crit_r10@l(r9) + lwz r12,crit_r11@l(r9) +#else lwz r10,crit_r10@l(0) lwz r12,crit_r11@l(0) +#endif stw r10,GPR10(r11) stw r12,GPR11(r11) +#ifdef CONFIG_VMAP_STACK + lwz r12,crit_dear@l(r9) + lwz r9,crit_esr@l(r9) +#else mfspr r12,SPRN_DEAR /* save DEAR and ESR in the frame */ mfspr r9,SPRN_ESR /* in them at the point where the */ +#endif stw r12,_DEAR(r11) /* since they may have had stuff */ stw r9,_ESR(r11) /* exception was taken */ mfspr r12,SPRN_SRR2 From 0512aadd750acf72b8906973c34e7092642d4323 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:16 +0000 Subject: [PATCH 043/302] powerpc/40x: Prepare normal exception handler for enabling MMU early Ensure normal exception handler are able to manage stuff with MMU enabled. For that we use CONFIG_VMAP_STACK related code allthough there is no intention to really activate CONFIG_VMAP_STACK on powerpc 40x for the moment. 40x uses SPRN_DEAR instead of SPRN_DAR and SPRN_ESR instead of SPRN_DSISR. Take it into account in common macros. 40x MSR value doesn't fit on 15 bits, use LOAD_REG_IMMEDIATE() in common macros that will be used also with 40x. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/01963af2b83037bca270d7bf1336ffcf35da8282.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 2 +- arch/powerpc/kernel/head_32.h | 15 ++++++++++++++- arch/powerpc/kernel/head_40x.S | 17 ++++++----------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8528b4c7f9d3e7..535c55f4393a2a 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -162,7 +162,7 @@ transfer_to_handler: li r12,-1 /* clear all pending debug events */ mtspr SPRN_DBSR,r12 lis r11,global_dbcr0@ha - tophys(r11,r11) + tophys_novmstack r11,r11 addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP lwz r9,TASK_CPU(r2) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 5d4706c1457271..ac6b391f14936a 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -22,9 +22,17 @@ #ifdef CONFIG_VMAP_STACK mfspr r10, SPRN_SPRG_THREAD .if \handle_dar_dsisr +#ifdef CONFIG_40x + mfspr r11, SPRN_DEAR +#else mfspr r11, SPRN_DAR +#endif stw r11, DAR(r10) +#ifdef CONFIG_40x + mfspr r11, SPRN_ESR +#else mfspr r11, SPRN_DSISR +#endif stw r11, DSISR(r10) .endif mfspr r11, SPRN_SRR0 @@ -61,7 +69,7 @@ .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 #ifdef CONFIG_VMAP_STACK - li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take DTLB miss */ mtmsr r11 isync mfspr r11, SPRN_SPRG_SCRATCH2 @@ -158,8 +166,13 @@ .macro save_dar_dsisr_on_stack reg1, reg2, sp #ifndef CONFIG_VMAP_STACK +#ifdef CONFIG_40x + mfspr \reg1, SPRN_DEAR + mfspr \reg2, SPRN_ESR +#else mfspr \reg1, SPRN_DAR mfspr \reg2, SPRN_DSISR +#endif stw \reg1, _DAR(\sp) stw \reg2, _DSISR(\sp) #endif diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 1468f38c3860a4..4bf0aee858eb4a 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -221,11 +221,8 @@ _ENTRY(saved_ksp_limit) * if they can't resolve the lightweight TLB fault. */ START_EXCEPTION(0x0300, DataStorage) - EXCEPTION_PROLOG - mfspr r5, SPRN_ESR /* Grab the ESR, save it */ - stw r5, _ESR(r11) - mfspr r4, SPRN_DEAR /* Grab the DEAR, save it */ - stw r4, _DEAR(r11) + EXCEPTION_PROLOG handle_dar_dsisr=1 + save_dar_dsisr_on_stack r4, r5, r11 EXC_XFER_LITE(0x300, handle_page_fault) /* @@ -244,17 +241,15 @@ _ENTRY(saved_ksp_limit) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) - EXCEPTION_PROLOG - mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ - stw r4,_DEAR(r11) + EXCEPTION_PROLOG handle_dar_dsisr=1 + save_dar_dsisr_on_stack r4, r5, r11 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) - EXCEPTION_PROLOG - mfspr r4,SPRN_ESR /* Grab the ESR and save it */ - stw r4,_ESR(r11) + EXCEPTION_PROLOG handle_dar_dsisr=1 + save_dar_dsisr_on_stack r4, r5, r11 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x700, program_check_exception) From be39e10506830a2e654fb799a48025999f89a6ff Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:17 +0000 Subject: [PATCH 044/302] powerpc/32: Reconcile interrupts in C There is no need for this to be in asm anymore, use the new interrupt entry wrapper. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/602e1ec47e15ca540f7edb9cf6feb6c249911bd6.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 4 ++ arch/powerpc/kernel/entry_32.S | 58 ---------------------------- 2 files changed, 4 insertions(+), 58 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 232a4847f5969f..b2f69e5dcb50bc 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -29,6 +29,10 @@ static inline void booke_restore_dbcr0(void) static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { +#ifdef CONFIG_PPC32 + if (!arch_irq_disabled_regs(regs)) + trace_hardirqs_off(); +#endif /* * Book3E reconciles irq soft mask in asm */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 535c55f4393a2a..0f18fe14649c66 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -202,22 +202,6 @@ transfer_to_handler_cont: lwz r9,4(r9) /* where to go when done */ #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 -#endif -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * When tracing IRQ state (lockdep) we enable the MMU before we call - * the IRQ tracing functions as they might access vmalloc space or - * perform IOs for console output. - * - * To speed up the syscall path where interrupts stay on, let's check - * first if we are changing the MSR value at all. - */ - tophys_novmstack r12, r1 - lwz r12,_MSR(r12) - andi. r12,r12,MSR_EE - bne 1f - - /* MSR isn't changing, just transition directly */ #endif mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r10 @@ -244,48 +228,6 @@ transfer_to_handler_cont: _ASM_NOKPROBE_SYMBOL(transfer_to_handler) _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) -#ifdef CONFIG_TRACE_IRQFLAGS -1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to - * keep interrupts disabled at this point otherwise we might risk - * taking an interrupt before we tell lockdep they are enabled. - */ - lis r12,reenable_mmu@h - ori r12,r12,reenable_mmu@l - LOAD_REG_IMMEDIATE(r0, MSR_KERNEL) - mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r0 - rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif - -reenable_mmu: - /* - * We save a bunch of GPRs, - * r3 can be different from GPR3(r1) at this point, r9 and r11 - * contains the old MSR and handler address respectively, - * r0, r4-r8, r12, CCR, CTR, XER etc... are left - * clobbered as they aren't useful past this point. - */ - - stwu r1,-32(r1) - stw r9,8(r1) - stw r11,12(r1) - stw r3,16(r1) - - /* If we are disabling interrupts (normal case), simply log it with - * lockdep - */ -1: bl trace_hardirqs_off - lwz r3,16(r1) - lwz r11,12(r1) - lwz r9,8(r1) - addi r1,r1,32 - mtctr r11 - mtlr r9 - bctr /* jump to handler */ -#endif /* CONFIG_TRACE_IRQFLAGS */ - #ifndef CONFIG_VMAP_STACK /* * On kernel stack overflow, load up an initial stack pointer From f93d866e14b746112fb29d69197dd83075bbd28c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:18 +0000 Subject: [PATCH 045/302] powerpc/32: Entry cpu time accounting in C There is no need for this to be in asm, use the new interrupt entry wrapper. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/daca4c3e05cdfe54d237162a0718b3aaca897662.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 3 +++ arch/powerpc/include/asm/ppc_asm.h | 10 ---------- arch/powerpc/kernel/entry_32.S | 1 - 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index b2f69e5dcb50bc..c35368adbe7188 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -32,6 +32,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup #ifdef CONFIG_PPC32 if (!arch_irq_disabled_regs(regs)) trace_hardirqs_off(); + + if (user_mode(regs)) + account_cpu_user_entry(); #endif /* * Book3E reconciles irq soft mask in asm diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 3dceb64fc9af24..8998122fc7e22d 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -23,18 +23,8 @@ */ #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) #define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) #else -#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) \ - MFTB(ra); /* get timebase */ \ - PPC_LL rb, ACCOUNT_STARTTIME_USER(ptr); \ - PPC_STL ra, ACCOUNT_STARTTIME(ptr); \ - subf rb,rb,ra; /* subtract start value */ \ - PPC_LL ra, ACCOUNT_USER_TIME(ptr); \ - add ra,ra,rb; /* add on to user time */ \ - PPC_STL ra, ACCOUNT_USER_TIME(ptr); \ - #define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) \ MFTB(ra); /* get timebase */ \ PPC_LL rb, ACCOUNT_STARTTIME(ptr); \ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0f18fe14649c66..0f3f1bdd909eb8 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -152,7 +152,6 @@ transfer_to_handler: lwz r12,THREAD_DBCR0(r12) andis. r12,r12,DBCR0_IDM@h #endif - ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif From 79f4bb17f18162dd95d6aeb6dc3b7da54d6139aa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:19 +0000 Subject: [PATCH 046/302] powerpc/32: Handle bookE debugging in C in exception entry The handling of SPRN_DBCR0 and other registers can easily be done in C instead of ASM. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6d6b2497115890b90cfa72a2b3ab1da5f78123c2.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 2 ++ arch/powerpc/kernel/entry_32.S | 23 ----------------------- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index c35368adbe7188..861e6eadc98c41 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -65,6 +65,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup if (user_mode(regs)) account_cpu_user_entry(); #endif + + booke_restore_dbcr0(); } /* diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0f3f1bdd909eb8..4ffbcf3df72e9d 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -146,32 +146,9 @@ transfer_to_handler: addi r2, r12, -THREAD addi r11,r1,STACK_FRAME_OVERHEAD stw r11,PT_REGS(r12) -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - /* Check to see if the dbcr0 register is set up to debug. Use the - internal debug mode bit to do this. */ - lwz r12,THREAD_DBCR0(r12) - andis. r12,r12,DBCR0_IDM@h -#endif #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - beq+ 3f - /* From user and task is ptraced - load up global dbcr0 */ - li r12,-1 /* clear all pending debug events */ - mtspr SPRN_DBSR,r12 - lis r11,global_dbcr0@ha - tophys_novmstack r11,r11 - addi r11,r11,global_dbcr0@l -#ifdef CONFIG_SMP - lwz r9,TASK_CPU(r2) - slwi r9,r9,2 - add r11,r11,r9 -#endif - lwz r12,0(r11) - mtspr SPRN_DBCR0,r12 -#endif - b 3f 2: /* if from kernel, check interrupted DOZE/NAP mode and From e464d92b292cc61f8f0791cf87d3646204bbb208 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:20 +0000 Subject: [PATCH 047/302] powerpc/32: Use fast instruction to set MSR RI in exception prolog on 8xx 8xx has registers SPRN_NRI, SPRN_EID and SPRN_EIE for changing MSR EE and RI. Use SPRN_EID in exception prolog to set RI. On an 8xx, it reduces the null_syscall test by 3 cycles. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/65f6bda827c2a2abce71ea7e07543e791163da33.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index ac6b391f14936a..25ee6b26ef5a8d 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -107,6 +107,8 @@ #endif #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#elif defined(CONFIG_PPC_8xx) + mtspr SPRN_EID, r2 /* Set MSR_RI */ #else #ifdef CONFIG_VMAP_STACK li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */ From 5747230645562921b5bc19f6409f7af08fe17c6d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:21 +0000 Subject: [PATCH 048/302] powerpc/32: Remove ksp_limit ksp_limit is there to help detect stack overflows. That is specific to ppc32 as it was removed from ppc64 in commit cbc9565ee826 ("powerpc: Remove ksp_limit on ppc64"). There are other means for detecting stack overflows. As ppc64 has proven to not need it, ppc32 should be able to do without it too. Lets remove it and simplify exception handling. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d789c3385b22e07bedc997613c0d26074cb513e7.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/processor.h | 2 - arch/powerpc/kernel/asm-offsets.c | 2 - arch/powerpc/kernel/entry_32.S | 68 +--------------------------- arch/powerpc/kernel/head_40x.S | 2 - arch/powerpc/kernel/head_booke.h | 1 - arch/powerpc/kernel/misc_32.S | 14 ------ arch/powerpc/kernel/process.c | 3 -- arch/powerpc/kernel/traps.c | 9 ---- arch/powerpc/lib/sstep.c | 9 ---- 9 files changed, 2 insertions(+), 108 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8acc3590c9712b..43cbd9281055a3 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -144,7 +144,6 @@ struct thread_struct { #endif #ifdef CONFIG_PPC32 void *pgdir; /* root of page-table tree */ - unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ #ifdef CONFIG_PPC_RTAS unsigned long rtas_sp; /* stack pointer for when in RTAS */ #endif @@ -282,7 +281,6 @@ struct thread_struct { #ifdef CONFIG_PPC32 #define INIT_THREAD { \ .ksp = INIT_SP, \ - .ksp_limit = INIT_SP_LIMIT, \ .pgdir = swapper_pg_dir, \ .fpexc_mode = MSR_FE0 | MSR_FE1, \ SPEFSCR_INIT \ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f3a662201a9fbd..73620536c80101 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -91,7 +91,6 @@ int main(void) DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); #else - OFFSET(KSP_LIMIT, thread_struct, ksp_limit); #ifdef CONFIG_PPC_RTAS OFFSET(RTAS_SP, thread_struct, rtas_sp); #endif @@ -381,7 +380,6 @@ int main(void) DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1)); DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0)); DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1)); - DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit)); #endif #endif diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 4ffbcf3df72e9d..66198e6e25e7a0 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -94,12 +94,6 @@ crit_transfer_to_handler: mfspr r0,SPRN_SRR1 stw r0,_SRR1(r11) - /* set the stack limit to the current stack */ - mfspr r8,SPRN_SPRG_THREAD - lwz r0,KSP_LIMIT(r8) - stw r0,SAVED_KSP_LIMIT(r11) - rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) - stw r0,KSP_LIMIT(r8) /* fall through */ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif @@ -107,12 +101,6 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #ifdef CONFIG_40x .globl crit_transfer_to_handler crit_transfer_to_handler: - /* set the stack limit to the current stack */ - mfspr r8,SPRN_SPRG_THREAD - lwz r0,KSP_LIMIT(r8) - stw r0,saved_ksp_limit@l(0) - rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) - stw r0,KSP_LIMIT(r8) /* fall through */ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif @@ -151,17 +139,10 @@ transfer_to_handler: #endif b 3f -2: /* if from kernel, check interrupted DOZE/NAP mode and - * check for stack overflow - */ + /* if from kernel, check interrupted DOZE/NAP mode */ +2: kuap_save_and_lock r11, r12, r9, r2, r6 addi r2, r12, -THREAD -#ifndef CONFIG_VMAP_STACK - lwz r9,KSP_LIMIT(r12) - cmplw r1,r9 /* if r1 <= ksp_limit */ - ble- stack_ovf /* then the kernel stack overflowed */ -#endif -5: #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 @@ -204,37 +185,6 @@ transfer_to_handler_cont: _ASM_NOKPROBE_SYMBOL(transfer_to_handler) _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) -#ifndef CONFIG_VMAP_STACK -/* - * On kernel stack overflow, load up an initial stack pointer - * and call StackOverflow(regs), which should not return. - */ -stack_ovf: - /* sometimes we use a statically-allocated stack, which is OK. */ - lis r12,_end@h - ori r12,r12,_end@l - cmplw r1,r12 - ble 5b /* r1 <= &_end is OK */ - SAVE_NVGPRS(r11) - addi r3,r1,STACK_FRAME_OVERHEAD - lis r1,init_thread_union@ha - addi r1,r1,init_thread_union@l - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD - lis r9,StackOverflow@ha - addi r9,r9,StackOverflow@l - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR0,r9 - mtspr SPRN_SRR1,r10 - rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif -_ASM_NOKPROBE_SYMBOL(stack_ovf) -#endif - .globl transfer_to_syscall transfer_to_syscall: SAVE_NVGPRS(r1) @@ -815,11 +765,6 @@ _ASM_NOKPROBE_SYMBOL(exc_exit_restart) #ifdef CONFIG_40x .globl ret_from_crit_exc ret_from_crit_exc: - mfspr r9,SPRN_SPRG_THREAD - lis r10,saved_ksp_limit@ha; - lwz r10,saved_ksp_limit@l(r10); - tovirt(r9,r9); - stw r10,KSP_LIMIT(r9) lis r9,crit_srr0@ha; lwz r9,crit_srr0@l(r9); lis r10,crit_srr1@ha; @@ -833,9 +778,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) #ifdef CONFIG_BOOKE .globl ret_from_crit_exc ret_from_crit_exc: - mfspr r9,SPRN_SPRG_THREAD - lwz r10,SAVED_KSP_LIMIT(r1) - stw r10,KSP_LIMIT(r9) RESTORE_xSRR(SRR0,SRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) @@ -843,9 +785,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) .globl ret_from_debug_exc ret_from_debug_exc: - mfspr r9,SPRN_SPRG_THREAD - lwz r10,SAVED_KSP_LIMIT(r1) - stw r10,KSP_LIMIT(r9) RESTORE_xSRR(SRR0,SRR1); RESTORE_xSRR(CSRR0,CSRR1); RESTORE_MMU_REGS; @@ -854,9 +793,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_debug_exc) .globl ret_from_mcheck_exc ret_from_mcheck_exc: - mfspr r9,SPRN_SPRG_THREAD - lwz r10,SAVED_KSP_LIMIT(r1) - stw r10,KSP_LIMIT(r9) RESTORE_xSRR(SRR0,SRR1); RESTORE_xSRR(CSRR0,CSRR1); RESTORE_xSRR(DSRR0,DSRR1); diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 4bf0aee858eb4a..72e4962902dead 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -95,8 +95,6 @@ _ENTRY(crit_dear) .space 4 _ENTRY(crit_esr) .space 4 -_ENTRY(saved_ksp_limit) - .space 4 /* * Exception prolog for critical exceptions. This is a little different diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 47857795f50a64..4a5f0c9b652b16 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -481,7 +481,6 @@ struct exception_regs { unsigned long csrr1; unsigned long dsrr0; unsigned long dsrr1; - unsigned long saved_ksp_limit; }; /* ensure this structure is always sized to a multiple of the stack alignment */ diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 717e658b90fd84..acc410043b9656 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -27,23 +27,14 @@ .text -/* - * We store the saved ksp_limit in the unused part - * of the STACK_FRAME_OVERHEAD - */ _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) - lwz r10,THREAD+KSP_LIMIT(r2) - stw r3, THREAD+KSP_LIMIT(r2) stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 - stw r10,8(r1) bl __do_softirq - lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) - stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr @@ -53,16 +44,11 @@ _GLOBAL(call_do_softirq) _GLOBAL(call_do_irq) mflr r0 stw r0,4(r1) - lwz r10,THREAD+KSP_LIMIT(r2) - stw r4, THREAD+KSP_LIMIT(r2) stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) mr r1,r4 - stw r10,8(r1) bl __do_irq - lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) - stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index afb334dfb6a89f..5b30df7b1b79bd 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1725,9 +1725,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; -#ifdef CONFIG_PPC32 - p->thread.ksp_limit = (unsigned long)end_of_stack(p); -#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT for (i = 0; i < nr_wp_slots(); i++) p->thread.ptrace_bps[i] = NULL; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index bb1387351b8f76..286b3a6b5c5e24 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1605,15 +1605,6 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception) bad_page_fault(regs, sig); } -DEFINE_INTERRUPT_HANDLER(StackOverflow) -{ - pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n", - current->comm, task_pid_nr(current), regs->gpr[1]); - debugger(regs); - show_regs(regs); - panic("kernel stack overflow"); -} - DEFINE_INTERRUPT_HANDLER(stack_overflow_exception) { die("Kernel stack overflow", regs, SIGSEGV); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c6aebc149d1412..739ea6dc461c3a 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3086,15 +3086,6 @@ NOKPROBE_SYMBOL(analyse_instr); */ static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs) { -#ifdef CONFIG_PPC32 - /* - * Check if we will touch kernel stack overflow - */ - if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { - printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n"); - return -EINVAL; - } -#endif /* CONFIG_PPC32 */ /* * Check if we already set since that means we'll * lose the previous value. From 7aa8dd67f15731f659390018b5c9fd95f5975b3d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:22 +0000 Subject: [PATCH 049/302] powerpc/32: Always enable data translation in exception prolog If the code can use a stack in vm area, it can also use a stack in linear space. Simplify code by removing old non VMAP stack code on PPC32. That means the data translation is now re-enabled early in exception prolog in all cases, not only when using VMAP stacks. While we are touching EXCEPTION_PROLOG macros, remove the unused for_rtas parameter in EXCEPTION_PROLOG_1. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7cd6440c60a7e8f4f035b245c57720f51e225aae.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/processor.h | 4 +- arch/powerpc/kernel/asm-offsets.c | 2 - arch/powerpc/kernel/entry_32.S | 19 +++---- arch/powerpc/kernel/fpu.S | 2 - arch/powerpc/kernel/head_32.h | 85 +--------------------------- arch/powerpc/kernel/head_40x.S | 23 -------- arch/powerpc/kernel/head_8xx.S | 19 +------ arch/powerpc/kernel/head_book3s_32.S | 47 +-------------- arch/powerpc/kernel/idle_6xx.S | 12 +--- arch/powerpc/kernel/idle_e500.S | 4 +- arch/powerpc/kernel/vector.S | 2 - arch/powerpc/mm/book3s32/hash_low.S | 14 ----- 12 files changed, 17 insertions(+), 216 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 43cbd9281055a3..eae16facc390eb 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -147,11 +147,9 @@ struct thread_struct { #ifdef CONFIG_PPC_RTAS unsigned long rtas_sp; /* stack pointer for when in RTAS */ #endif -#endif #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) unsigned long kuap; /* opened segments for user access */ #endif -#ifdef CONFIG_VMAP_STACK unsigned long srr0; unsigned long srr1; unsigned long dar; @@ -160,7 +158,7 @@ struct thread_struct { unsigned long r0, r3, r4, r5, r6, r8, r9, r11; unsigned long lr, ctr; #endif -#endif +#endif /* CONFIG_PPC32 */ /* Debug Registers */ struct debug_reg debug; #ifdef CONFIG_PPC_FPU_REGS diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 73620536c80101..85ba2b0bc8d870 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -131,7 +131,6 @@ int main(void) OFFSET(KSP_VSID, thread_struct, ksp_vsid); #else /* CONFIG_PPC64 */ OFFSET(PGDIR, thread_struct, pgdir); -#ifdef CONFIG_VMAP_STACK OFFSET(SRR0, thread_struct, srr0); OFFSET(SRR1, thread_struct, srr1); OFFSET(DAR, thread_struct, dar); @@ -148,7 +147,6 @@ int main(void) OFFSET(THLR, thread_struct, lr); OFFSET(THCTR, thread_struct, ctr); #endif -#endif #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); OFFSET(THREAD_ACC, thread_struct, acc); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 66198e6e25e7a0..33e97032ca25c0 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -129,7 +129,7 @@ transfer_to_handler: stw r12,_CTR(r11) stw r2,_XER(r11) mfspr r12,SPRN_SPRG_THREAD - tovirt_vmstack r12, r12 + tovirt(r12, r12) beq 2f /* if from user, fix up THREAD.regs */ addi r2, r12, -THREAD addi r11,r1,STACK_FRAME_OVERHEAD @@ -153,8 +153,7 @@ transfer_to_handler: transfer_to_handler_cont: 3: mflr r9 - tovirt_novmstack r2, r2 /* set r2 to current */ - tovirt_vmstack r9, r9 + tovirt(r9, r9) lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9) /* where to go when done */ #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) @@ -933,7 +932,6 @@ _GLOBAL(enter_rtas) lis r6,1f@ha /* physical return address for rtas */ addi r6,r6,1f@l tophys(r6,r6) - tophys_novmstack r7, r1 lwz r8,RTASENTRY(r4) lwz r4,RTASBASE(r4) mfmsr r9 @@ -942,22 +940,19 @@ _GLOBAL(enter_rtas) mtmsr r0 /* disable interrupts so SRR0/1 don't get trashed */ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) mtlr r6 - stw r7, THREAD + RTAS_SP(r2) + stw r1, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 rfi -1: tophys_novmstack r9, r1 -#ifdef CONFIG_VMAP_STACK +1: li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */ mtmsr r0 isync -#endif - lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */ - lwz r9,8(r9) /* original msr value */ + lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */ + lwz r9,8(r1) /* original msr value */ addi r1,r1,INT_FRAME_SIZE li r0,0 - tophys_novmstack r7, r2 - stw r0, THREAD + RTAS_SP(r7) + stw r0, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 rfi /* return to caller */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 3ff9a8fafa467c..2c57ece6671c49 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -92,9 +92,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* enable use of FP after return */ #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ -#ifdef CONFIG_VMAP_STACK tovirt(r5, r5) -#endif lwz r4,THREAD_FPEXC_MODE(r5) ori r9,r9,MSR_FP /* enable FP for current */ or r9,r9,r4 diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 25ee6b26ef5a8d..1b707755c68e1e 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -19,7 +19,6 @@ .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0 mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 -#ifdef CONFIG_VMAP_STACK mfspr r10, SPRN_SPRG_THREAD .if \handle_dar_dsisr #ifdef CONFIG_40x @@ -37,17 +36,13 @@ .endif mfspr r11, SPRN_SRR0 stw r11, SRR0(r10) -#endif mfspr r11, SPRN_SRR1 /* check whether user or kernel */ -#ifdef CONFIG_VMAP_STACK stw r11, SRR1(r10) -#endif mfcr r10 andi. r11, r11, MSR_PR .endm -.macro EXCEPTION_PROLOG_1 for_rtas=0 -#ifdef CONFIG_VMAP_STACK +.macro EXCEPTION_PROLOG_1 mtspr SPRN_SPRG_SCRATCH2,r1 subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f @@ -55,20 +50,13 @@ lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE 1: +#ifdef CONFIG_VMAP_STACK mtcrf 0x3f, r1 bt 32 - THREAD_ALIGN_SHIFT, stack_overflow -#else - subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ - beq 1f - mfspr r11,SPRN_SPRG_THREAD - lwz r11,TASK_STACK-THREAD(r11) - addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE -1: tophys(r11, r11) #endif .endm .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 -#ifdef CONFIG_VMAP_STACK LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take DTLB miss */ mtmsr r11 isync @@ -76,11 +64,6 @@ stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 -#else - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt(r1, r11) /* set new kernel sp */ -#endif stw r10,_CCR(r11) /* save registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) @@ -90,7 +73,6 @@ stw r12,GPR11(r11) mflr r10 stw r10,_LINK(r11) -#ifdef CONFIG_VMAP_STACK mfspr r12, SPRN_SPRG_THREAD tovirt(r12, r12) .if \handle_dar_dsisr @@ -101,20 +83,12 @@ .endif lwz r9, SRR1(r12) lwz r12, SRR0(r12) -#else - mfspr r12,SPRN_SRR0 - mfspr r9,SPRN_SRR1 -#endif #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #elif defined(CONFIG_PPC_8xx) mtspr SPRN_EID, r2 /* Set MSR_RI */ #else -#ifdef CONFIG_VMAP_STACK li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */ -#else - li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */ -#endif mtmsr r10 /* (except for mach check in rtas) */ #endif stw r0,GPR0(r11) @@ -166,59 +140,6 @@ b transfer_to_syscall /* jump to handler */ .endm -.macro save_dar_dsisr_on_stack reg1, reg2, sp -#ifndef CONFIG_VMAP_STACK -#ifdef CONFIG_40x - mfspr \reg1, SPRN_DEAR - mfspr \reg2, SPRN_ESR -#else - mfspr \reg1, SPRN_DAR - mfspr \reg2, SPRN_DSISR -#endif - stw \reg1, _DAR(\sp) - stw \reg2, _DSISR(\sp) -#endif -.endm - -.macro get_and_save_dar_dsisr_on_stack reg1, reg2, sp -#ifdef CONFIG_VMAP_STACK - lwz \reg1, _DAR(\sp) - lwz \reg2, _DSISR(\sp) -#else - save_dar_dsisr_on_stack \reg1, \reg2, \sp -#endif -.endm - -.macro tovirt_vmstack dst, src -#ifdef CONFIG_VMAP_STACK - tovirt(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - -.macro tovirt_novmstack dst, src -#ifndef CONFIG_VMAP_STACK - tovirt(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - -.macro tophys_novmstack dst, src -#ifndef CONFIG_VMAP_STACK - tophys(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - /* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). @@ -266,7 +187,6 @@ ret_from_except) .macro vmap_stack_overflow_exception -#ifdef CONFIG_VMAP_STACK #ifdef CONFIG_SMP mfspr r1, SPRN_SPRG_THREAD lwz r1, TASK_CPU - THREAD(r1) @@ -285,7 +205,6 @@ SAVE_NVGPRS(r11) addi r3, r1, STACK_FRAME_OVERHEAD EXC_XFER_STD(0, stack_overflow_exception) -#endif .endm #endif /* __HEAD_32_H__ */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 72e4962902dead..7da673ec63efe6 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -111,12 +111,10 @@ _ENTRY(crit_esr) mfspr r11,SPRN_SRR1 stw r10,crit_srr0@l(0) stw r11,crit_srr1@l(0) -#ifdef CONFIG_VMAP_STACK mfspr r10,SPRN_DEAR mfspr r11,SPRN_ESR stw r10,crit_dear@l(0) stw r11,crit_esr@l(0) -#endif mfcr r10 /* save CR in r10 for now */ mfspr r11,SPRN_SRR3 /* check whether user or kernel */ andi. r11,r11,MSR_PR @@ -126,7 +124,6 @@ _ENTRY(crit_esr) /* COMING FROM USER MODE */ mfspr r11,SPRN_SPRG_THREAD /* if from user, start at top of */ lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ -#ifdef CONFIG_VMAP_STACK 1: stw r1,crit_r1@l(0) addi r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ LOAD_REG_IMMEDIATE(r11,MSR_KERNEL & ~(MSR_IR | MSR_RI)) @@ -136,35 +133,18 @@ _ENTRY(crit_esr) stw r11,GPR1(r1) stw r11,0(r1) mr r11,r1 -#else -1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ - tophys(r11,r11) - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt(r1,r11) -#endif stw r10,_CCR(r11) /* save various registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) mflr r10 stw r10,_LINK(r11) -#ifdef CONFIG_VMAP_STACK lis r9,PAGE_OFFSET@ha lwz r10,crit_r10@l(r9) lwz r12,crit_r11@l(r9) -#else - lwz r10,crit_r10@l(0) - lwz r12,crit_r11@l(0) -#endif stw r10,GPR10(r11) stw r12,GPR11(r11) -#ifdef CONFIG_VMAP_STACK lwz r12,crit_dear@l(r9) lwz r9,crit_esr@l(r9) -#else - mfspr r12,SPRN_DEAR /* save DEAR and ESR in the frame */ - mfspr r9,SPRN_ESR /* in them at the point where the */ -#endif stw r12,_DEAR(r11) /* since they may have had stuff */ stw r9,_ESR(r11) /* exception was taken */ mfspr r12,SPRN_SRR2 @@ -220,7 +200,6 @@ _ENTRY(crit_esr) */ START_EXCEPTION(0x0300, DataStorage) EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 EXC_XFER_LITE(0x300, handle_page_fault) /* @@ -240,14 +219,12 @@ _ENTRY(crit_esr) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x700, program_check_exception) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 46dff3f9c31f16..792e2fd864797b 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -124,7 +124,6 @@ instruction_counter: . = 0x200 MachineCheck: EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 li r6, RPN_PATTERN mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -137,7 +136,6 @@ MachineCheck: . = 0x600 Alignment: EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 li r6, RPN_PATTERN mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -333,21 +331,16 @@ DataTLBError: cmpwi cr1, r11, RPN_PATTERN beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ -#ifdef CONFIG_VMAP_STACK li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ -#endif EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 handle_dar_dsisr=1 - get_and_save_dar_dsisr_on_stack r4, r5, r11 + lwz r4, _DAR(r11) + lwz r5, _DSISR(r11) andis. r10,r5,DSISR_NOHPTE@h beq+ .Ldtlbie tlbie r4 .Ldtlbie: -#ifndef CONFIG_VMAP_STACK - li r10,RPN_PATTERN - mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ -#endif /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) @@ -364,10 +357,6 @@ do_databreakpoint: addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_BAR stw r4,_DAR(r11) -#ifndef CONFIG_VMAP_STACK - mfspr r5,SPRN_DSISR - stw r5,_DSISR(r11) -#endif EXC_XFER_STD(0x1c00, do_break) . = 0x1c00 @@ -510,14 +499,10 @@ FixupDAR:/* Entry point for dcbx workaround. */ 152: mfdar r11 mtctr r11 /* restore ctr reg from DAR */ -#ifdef CONFIG_VMAP_STACK mfspr r11, SPRN_SPRG_THREAD stw r10, DAR(r11) mfspr r10, SPRN_DSISR stw r10, DSISR(r11) -#else - mtdar r10 /* save fault EA to DAR */ -#endif mfspr r10,SPRN_M_TW b DARFixed /* Go back to normal TLB handling */ diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 565e84e20a7214..1cf7bd5d5ec10b 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -260,21 +260,14 @@ __secondary_hold_acknowledge: MachineCheck: EXCEPTION_PROLOG_0 #ifdef CONFIG_PPC_CHRP -#ifdef CONFIG_VMAP_STACK mtspr SPRN_SPRG_SCRATCH2,r1 mfspr r1, SPRN_SPRG_THREAD lwz r1, RTAS_SP(r1) cmpwi cr1, r1, 0 bne cr1, 7f mfspr r1, SPRN_SPRG_SCRATCH2 -#else - mfspr r11, SPRN_SPRG_THREAD - lwz r11, RTAS_SP(r11) - cmpwi cr1, r11, 0 - bne cr1, 7f -#endif #endif /* CONFIG_PPC_CHRP */ - EXCEPTION_PROLOG_1 for_rtas=1 + EXCEPTION_PROLOG_1 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP @@ -288,7 +281,6 @@ MachineCheck: . = 0x300 DO_KVM 0x300 DataAccess: -#ifdef CONFIG_VMAP_STACK #ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION mtspr SPRN_SPRG_SCRATCH2,r10 @@ -310,29 +302,11 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 b handle_page_fault_tramp_1 -#else /* CONFIG_VMAP_STACK */ - EXCEPTION_PROLOG handle_dar_dsisr=1 - get_and_save_dar_dsisr_on_stack r4, r5, r11 -#ifdef CONFIG_PPC_BOOK3S_604 -BEGIN_MMU_FTR_SECTION - andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h - bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ - rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ - bl hash_page - b handle_page_fault_tramp_1 -MMU_FTR_SECTION_ELSE -#endif - b handle_page_fault_tramp_2 -#ifdef CONFIG_PPC_BOOK3S_604 -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) -#endif -#endif /* CONFIG_VMAP_STACK */ /* Instruction access exception. */ . = 0x400 DO_KVM 0x400 InstructionAccess: -#ifdef CONFIG_VMAP_STACK mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 mfspr r10, SPRN_SPRG_THREAD @@ -353,18 +327,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 -#else /* CONFIG_VMAP_STACK */ - EXCEPTION_PROLOG - andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */ - beq 1f /* if so, try to put a PTE */ - li r3,0 /* into the hash table */ - mr r4,r12 /* SRR0 is fault address */ -#ifdef CONFIG_PPC_BOOK3S_604 -BEGIN_MMU_FTR_SECTION - bl hash_page -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) -#endif -#endif /* CONFIG_VMAP_STACK */ andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r5, _DSISR(r11) stw r12, _DAR(r11) @@ -378,7 +340,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) DO_KVM 0x600 Alignment: EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 addi r3,r1,STACK_FRAME_OVERHEAD b alignment_exception_tramp @@ -689,18 +650,13 @@ alignment_exception_tramp: EXC_XFER_STD(0x600, alignment_exception) handle_page_fault_tramp_1: -#ifdef CONFIG_VMAP_STACK EXCEPTION_PROLOG_2 handle_dar_dsisr=1 -#endif lwz r5, _DSISR(r11) - /* fall through */ -handle_page_fault_tramp_2: andis. r0, r5, DSISR_DABRMATCH@h bne- 1f EXC_XFER_LITE(0x300, handle_page_fault) 1: EXC_XFER_STD(0x300, do_break) -#ifdef CONFIG_VMAP_STACK #ifdef CONFIG_PPC_BOOK3S_604 .macro save_regs_thread thread stw r0, THR0(\thread) @@ -775,6 +731,7 @@ fast_hash_page_return: rfi #endif /* CONFIG_PPC_BOOK3S_604 */ +#ifdef CONFIG_VMAP_STACK stack_overflow: vmap_stack_overflow_exception #endif diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 69df840f72535f..153366e178c4b5 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -145,9 +145,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* * Return from NAP/DOZE mode, restore some CPU specific registers, - * we are called with DR/IR still off and r2 containing physical - * address of current. R11 points to the exception frame (physical - * address). We have to preserve r10. + * R11 points to the exception frame. We have to preserve r10. */ _GLOBAL(power_save_ppc32_restore) lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */ @@ -166,11 +164,7 @@ BEGIN_FTR_SECTION mfspr r9,SPRN_HID0 andis. r9,r9,HID0_NAP@h beq 1f -#ifdef CONFIG_VMAP_STACK addis r9, r11, nap_save_msscr0@ha -#else - addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha -#endif lwz r9,nap_save_msscr0@l(r9) mtspr SPRN_MSSCR0, r9 sync @@ -178,11 +172,7 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) BEGIN_FTR_SECTION -#ifdef CONFIG_VMAP_STACK addis r9, r11, nap_save_hid1@ha -#else - addis r9,r11,(nap_save_hid1-KERNELBASE)@ha -#endif lwz r9,nap_save_hid1@l(r9) mtspr SPRN_HID1, r9 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 72c85b6f3898b4..7795727e7f08e2 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -74,8 +74,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) /* * Return from NAP/DOZE mode, restore some CPU specific registers, - * r2 containing physical address of current. - * r11 points to the exception frame (physical address). + * r2 containing address of current. + * r11 points to the exception frame. * We have to preserve r10. */ _GLOBAL(power_save_ppc32_restore) diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 801dc28fdcca58..f5a52f444e3604 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -67,9 +67,7 @@ _GLOBAL(load_up_altivec) #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ oris r9,r9,MSR_VEC@h -#ifdef CONFIG_VMAP_STACK tovirt(r5, r5) -#endif #else ld r4,PACACURRENT(r13) addi r5,r4,THREAD /* Get THREAD */ diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 0e6dc830c38bf6..fb4233a5bdf7d7 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -140,10 +140,6 @@ _GLOBAL(hash_page) bne- .Lretry /* retry if someone got there first */ mfsrin r3,r4 /* get segment reg for segment */ -#ifndef CONFIG_VMAP_STACK - mfctr r0 - stw r0,_CTR(r11) -#endif bl create_hpte /* add the hash table entry */ #ifdef CONFIG_SMP @@ -152,17 +148,7 @@ _GLOBAL(hash_page) li r0,0 stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) #endif - -#ifdef CONFIG_VMAP_STACK b fast_hash_page_return -#else - /* Return from the exception */ - lwz r5,_CTR(r11) - mtctr r5 - lwz r0,GPR0(r11) - lwz r8,GPR8(r11) - b fast_exception_return -#endif #ifdef CONFIG_SMP .Lhash_page_out: From 5b1c9a0d7f3bcac591767fa1aad1323564673b26 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:23 +0000 Subject: [PATCH 050/302] powerpc/32: Tag DAR in EXCEPTION_PROLOG_2 for the 8xx 8xx requires to tag the DAR with a magic value in order to fixup DAR on faults generated by 'dcbX', as the 8xx forgets to update the DAR for those faults. Do the tagging as early as possible, that is before enabling MMU. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/853a2e28ca7c5fc85617037030f99fe6070c9536.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 6 ++++++ arch/powerpc/kernel/head_8xx.S | 18 ++++++------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 1b707755c68e1e..910f86642eecdb 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -57,6 +57,12 @@ .endm .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 +#ifdef CONFIG_PPC_8xx + .if \handle_dar_dsisr + li r11, RPN_PATTERN + mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ + .endif +#endif LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take DTLB miss */ mtmsr r11 isync diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 792e2fd864797b..cdbfa9d413530c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -30,6 +30,12 @@ #include #include +/* + * Value for the bits that have fixed value in RPN entries. + * Also used for tagging DAR for DTLBerror. + */ +#define RPN_PATTERN 0x00f0 + #include "head_32.h" .macro compare_to_kernel_boundary scratch, addr @@ -42,12 +48,6 @@ #endif .endm -/* - * Value for the bits that have fixed value in RPN entries. - * Also used for tagging DAR for DTLBerror. - */ -#define RPN_PATTERN 0x00f0 - #define PAGE_SHIFT_512K 19 #define PAGE_SHIFT_8M 23 @@ -124,8 +124,6 @@ instruction_counter: . = 0x200 MachineCheck: EXCEPTION_PROLOG handle_dar_dsisr=1 - li r6, RPN_PATTERN - mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x200, machine_check_exception) @@ -136,8 +134,6 @@ MachineCheck: . = 0x600 Alignment: EXCEPTION_PROLOG handle_dar_dsisr=1 - li r6, RPN_PATTERN - mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ addi r3,r1,STACK_FRAME_OVERHEAD b .Lalignment_exception_ool @@ -331,8 +327,6 @@ DataTLBError: cmpwi cr1, r11, RPN_PATTERN beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ - li r11, RPN_PATTERN - mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 handle_dar_dsisr=1 lwz r4, _DAR(r11) From 9b6150fb8942d92e0991b9a4b02fa2e6f6b03238 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:24 +0000 Subject: [PATCH 051/302] powerpc/32: Enable instruction translation at the same time as data translation On 40x and 8xx, kernel text is pinned. On book3s/32, kernel text is mapped by BATs. Enable instruction translation at the same time as data translation, it makes things simpler. In syscall handler, MSR_RI can also be set at the same time because srr0/srr1 are already saved and r1 is set properly. On booke, translation is always on, so at the end all PPC32 have translation on early. Just update msr. Also update comment in power_save_ppc32_restore(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5269c7e5f5d2117358af3a89744d75a116be27b0.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 30 ++++++++++++------------------ arch/powerpc/kernel/head_32.h | 13 ++++++++----- arch/powerpc/kernel/head_40x.S | 10 +++++++--- arch/powerpc/kernel/head_booke.h | 6 ++++-- 4 files changed, 31 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 33e97032ca25c0..01a064c8a96aaa 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -153,19 +153,11 @@ transfer_to_handler: transfer_to_handler_cont: 3: mflr r9 - tovirt(r9, r9) lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9) /* where to go when done */ -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r10 + mtctr r11 mtlr r9 - rfi /* jump to handler, enable MMU */ -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif + bctr /* jump to handler */ #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING @@ -444,8 +436,6 @@ fee_restarts: li r10,-1 stw r10,_TRAP(r11) addi r3,r1,STACK_FRAME_OVERHEAD - lis r10,MSR_KERNEL@h - ori r10,r10,MSR_KERNEL@l bl transfer_to_handler_full .long unrecoverable_exception .long ret_from_except @@ -945,16 +935,20 @@ _GLOBAL(enter_rtas) mtspr SPRN_SRR1,r9 rfi 1: - li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */ - mtmsr r0 - isync + lis r8, 1f@h + ori r8, r8, 1f@l + LOAD_REG_IMMEDIATE(r9,MSR_KERNEL) + mtspr SPRN_SRR0,r8 + mtspr SPRN_SRR1,r9 + rfi /* Reactivate MMU translation */ +1: lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */ lwz r9,8(r1) /* original msr value */ addi r1,r1,INT_FRAME_SIZE li r0,0 stw r0, THREAD + RTAS_SP(r2) - mtspr SPRN_SRR0,r8 - mtspr SPRN_SRR1,r9 - rfi /* return to caller */ + mtlr r8 + mtmsr r9 + blr /* return to caller */ _ASM_NOKPROBE_SYMBOL(enter_rtas) #endif /* CONFIG_PPC_RTAS */ diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 910f86642eecdb..88b02bd91e8ed6 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -63,10 +63,14 @@ mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ .endif #endif - LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take DTLB miss */ - mtmsr r11 - isync + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~MSR_RI) /* re-enable MMU */ + mtspr SPRN_SRR1, r11 + lis r11, 1f@h + ori r11, r11, 1f@l + mtspr SPRN_SRR0, r11 mfspr r11, SPRN_SPRG_SCRATCH2 + rfi +1: stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 @@ -94,7 +98,7 @@ #elif defined(CONFIG_PPC_8xx) mtspr SPRN_EID, r2 /* Set MSR_RI */ #else - li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */ + li r10, MSR_KERNEL /* can take exceptions */ mtmsr r10 /* (except for mach check in rtas) */ #endif stw r0,GPR0(r11) @@ -179,7 +183,6 @@ #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ li r10,trap; \ stw r10,_TRAP(r11); \ - LOAD_REG_IMMEDIATE(r10, msr); \ bl tfer; \ .long hdlr; \ .long ret diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7da673ec63efe6..55fa99c5085c64 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -126,9 +126,13 @@ _ENTRY(crit_esr) lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ 1: stw r1,crit_r1@l(0) addi r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ - LOAD_REG_IMMEDIATE(r11,MSR_KERNEL & ~(MSR_IR | MSR_RI)) - mtmsr r11 - isync + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)) /* re-enable MMU */ + mtspr SPRN_SRR1, r11 + lis r11, 1f@h + ori r11, r11, 1f@l + mtspr SPRN_SRR0, r11 + rfi +1: lwz r11,crit_r1@l(0) stw r11,GPR1(r1) stw r11,0(r1) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 4a5f0c9b652b16..f712b9bc6d6207 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -53,6 +53,8 @@ END_BTB_FLUSH_SECTION mfspr r11, SPRN_SRR1; \ DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \ andi. r11, r11, MSR_PR; /* check whether user or kernel */\ + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL); \ + mtmsr r11; \ mr r11, r1; \ beq 1f; \ BOOKE_CLEAR_BTB(r11) \ @@ -192,6 +194,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \ BOOKE_CLEAR_BTB(r10) \ andi. r11,r11,MSR_PR; \ + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)); \ + mtmsr r11; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\ addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ @@ -282,8 +286,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ li r10,trap; \ stw r10,_TRAP(r11); \ - lis r10,msr@h; \ - ori r10,r10,msr@l; \ bl tfer; \ .long hdlr; \ .long ret From a4719f5bb6d7dc220bffdc1b9f5ce5eaa5543581 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:25 +0000 Subject: [PATCH 052/302] powerpc/32: Statically initialise first emergency context The check of the emergency context initialisation in vmap_stack_overflow is buggy for the SMP case, as it compares r1 with 0 while in the SMP case r1 is offseted by the CPU id. Instead of fixing it, just perform static initialisation of the first emergency context. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4a67ba422be75713286dca0c86ee0d3df2eb6dfa.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 6 +----- arch/powerpc/kernel/setup_32.c | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 88b02bd91e8ed6..15c6fc7cbbf52a 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -205,11 +205,7 @@ lis r1, emergency_ctx@ha #endif lwz r1, emergency_ctx@l(r1) - cmpwi cr1, r1, 0 - bne cr1, 1f - lis r1, init_thread_union@ha - addi r1, r1, init_thread_union@l -1: addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE + addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 SAVE_NVGPRS(r11) addi r3, r1, STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 8ba49a6bf5159e..d7c1f92152af67 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -164,7 +164,7 @@ void __init irqstack_early_init(void) } #ifdef CONFIG_VMAP_STACK -void *emergency_ctx[NR_CPUS] __ro_after_init; +void *emergency_ctx[NR_CPUS] __ro_after_init = {[0] = &init_stack}; void __init emergency_stack_init(void) { From 5b5e5bc53def654c2dc437dd327f7a47c48d81d3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:27 +0000 Subject: [PATCH 053/302] powerpc/32: Add vmap_stack_overflow label inside the macro For consistency, add in the macro the label used by exception prolog to branch to stack overflow processing. While at it, enclose the macro in #ifdef CONFIG_VMAP_STACK on the 8xx as already done on book3s/32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cf80056f5b946572ad98aea9d915dd25b23beda6.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 3 ++- arch/powerpc/kernel/head_8xx.S | 3 ++- arch/powerpc/kernel/head_book3s_32.S | 1 - 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 15c6fc7cbbf52a..d97ec94b34dab9 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -52,7 +52,7 @@ 1: #ifdef CONFIG_VMAP_STACK mtcrf 0x3f, r1 - bt 32 - THREAD_ALIGN_SHIFT, stack_overflow + bt 32 - THREAD_ALIGN_SHIFT, vmap_stack_overflow #endif .endm @@ -196,6 +196,7 @@ ret_from_except) .macro vmap_stack_overflow_exception +vmap_stack_overflow: #ifdef CONFIG_SMP mfspr r1, SPRN_SPRG_THREAD lwz r1, TASK_CPU - THREAD(r1) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index cdbfa9d413530c..b63445c55f4dd3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -338,8 +338,9 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) -stack_overflow: +#ifdef CONFIG_VMAP_STACK vmap_stack_overflow_exception +#endif /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 1cf7bd5d5ec10b..79a7715fadde49 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -732,7 +732,6 @@ fast_hash_page_return: #endif /* CONFIG_PPC_BOOK3S_604 */ #ifdef CONFIG_VMAP_STACK -stack_overflow: vmap_stack_overflow_exception #endif From 7bf1d7e1abab0d9f47ebce144deadb4409d0d631 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:28 +0000 Subject: [PATCH 054/302] powerpc/32: Use START_EXCEPTION() as much as possible Everywhere where it is possible, use START_EXCEPTION(). This will help for proper exception init in future patches. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d47c1cc242bbbef8658327503726abdaef9b63ef.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_40x.S | 12 +++++------ arch/powerpc/kernel/head_8xx.S | 27 +++++++++---------------- arch/powerpc/kernel/head_book3s_32.S | 30 ++++++++-------------------- 3 files changed, 22 insertions(+), 47 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 55fa99c5085c64..c14a71e0d6d37c 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -247,17 +247,15 @@ _ENTRY(crit_esr) EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ - . = 0x1000 + START_EXCEPTION(0x1000, DecrementerTrap) b Decrementer -/* 0x1010 - Fixed Interval Timer (FIT) Exception -*/ - . = 0x1010 +/* 0x1010 - Fixed Interval Timer (FIT) Exception */ + START_EXCEPTION(0x1010, FITExceptionTrap) b FITException -/* 0x1020 - Watchdog Timer (WDT) Exception -*/ - . = 0x1020 +/* 0x1020 - Watchdog Timer (WDT) Exception */ + START_EXCEPTION(0x1020, WDTExceptionTrap) b WDTException /* 0x1100 - Data TLB Miss Exception diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index b63445c55f4dd3..11789a077d769f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -121,8 +121,7 @@ instruction_counter: EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD) /* Machine check */ - . = 0x200 -MachineCheck: + START_EXCEPTION(0x200, MachineCheck) EXCEPTION_PROLOG handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x200, machine_check_exception) @@ -131,8 +130,7 @@ MachineCheck: EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) /* Alignment exception */ - . = 0x600 -Alignment: + START_EXCEPTION(0x600, Alignment) EXCEPTION_PROLOG handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD b .Lalignment_exception_ool @@ -149,8 +147,7 @@ Alignment: EXC_XFER_STD(0x600, alignment_exception) /* System call */ - . = 0xc00 -SystemCall: + START_EXCEPTION(0xc00, SystemCall) SYSCALL_ENTRY 0xc00 /* Single step - not used on 601 */ @@ -161,7 +158,6 @@ SystemCall: */ EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) - . = 0x1100 /* * For the MPC8xx, this is a software tablewalk to load the instruction * TLB. The task switch loads the M_TWB register with the pointer to the first @@ -183,7 +179,7 @@ SystemCall: #define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) #endif -InstructionTLBMiss: + START_EXCEPTION(0x1100, InstructionTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 @@ -239,8 +235,7 @@ InstructionTLBMiss: rfi #endif - . = 0x1200 -DataStoreTLBMiss: + START_EXCEPTION(0x1200, DataStoreTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 mfcr r11 @@ -303,8 +298,7 @@ DataStoreTLBMiss: * to many reasons, such as executing guarded memory or illegal instruction * addresses. There is nothing to do but handle a big time error fault. */ - . = 0x1300 -InstructionTLBError: + START_EXCEPTION(0x1300, InstructionTLBError) EXCEPTION_PROLOG andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ andis. r10,r9,SRR1_ISI_NOPT@h @@ -320,8 +314,7 @@ InstructionTLBError: * many reasons, including a dirty update to a pte. We bail out to * a higher level function that can handle it. */ - . = 0x1400 -DataTLBError: + START_EXCEPTION(0x1400, DataTLBError) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_DAR cmpwi cr1, r11, RPN_PATTERN @@ -354,8 +347,7 @@ do_databreakpoint: stw r4,_DAR(r11) EXC_XFER_STD(0x1c00, do_break) - . = 0x1c00 -DataBreakpoint: + START_EXCEPTION(0x1c00, DataBreakpoint) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_SRR0 cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l @@ -368,8 +360,7 @@ DataBreakpoint: rfi #ifdef CONFIG_PERF_EVENTS - . = 0x1d00 -InstructionBreakpoint: + START_EXCEPTION(0x1d00, InstructionBreakpoint) mtspr SPRN_SPRG_SCRATCH0, r10 lwz r10, (instruction_counter - PAGE_OFFSET)@l(0) addi r10, r10, -1 diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 79a7715fadde49..17510c99802d7d 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -255,9 +255,7 @@ __secondary_hold_acknowledge: * pointer when we take an exception from supervisor mode.) * -- paulus. */ - . = 0x200 - DO_KVM 0x200 -MachineCheck: + START_EXCEPTION(0x200, MachineCheck) EXCEPTION_PROLOG_0 #ifdef CONFIG_PPC_CHRP mtspr SPRN_SPRG_SCRATCH2,r1 @@ -278,9 +276,7 @@ MachineCheck: #endif /* Data access exception. */ - . = 0x300 - DO_KVM 0x300 -DataAccess: + START_EXCEPTION(0x300, DataAccess) #ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION mtspr SPRN_SPRG_SCRATCH2,r10 @@ -304,9 +300,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) b handle_page_fault_tramp_1 /* Instruction access exception. */ - . = 0x400 - DO_KVM 0x400 -InstructionAccess: + START_EXCEPTION(0x400, InstructionAccess) mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 mfspr r10, SPRN_SPRG_THREAD @@ -336,9 +330,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) /* Alignment exception */ - . = 0x600 - DO_KVM 0x600 -Alignment: + START_EXCEPTION(0x600, Alignment) EXCEPTION_PROLOG handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD b alignment_exception_tramp @@ -347,9 +339,7 @@ Alignment: EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) /* Floating-point unavailable */ - . = 0x800 - DO_KVM 0x800 -FPUnavailable: + START_EXCEPTION(0x800, FPUnavailable) #ifdef CONFIG_PPC_FPU BEGIN_FTR_SECTION /* @@ -375,9 +365,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) /* System call */ - . = 0xc00 - DO_KVM 0xc00 -SystemCall: + START_EXCEPTION(0xc00, SystemCall) SYSCALL_ENTRY 0xc00 EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) @@ -391,12 +379,10 @@ SystemCall: * non-altivec kernel running on a machine with altivec just * by executing an altivec instruction. */ - . = 0xf00 - DO_KVM 0xf00 + START_EXCEPTION(0xf00, PerformanceMonitorTrap) b PerformanceMonitor - . = 0xf20 - DO_KVM 0xf20 + START_EXCEPTION(0xf20, AltiVecUnavailableTrap) b AltiVecUnavailable /* From dc13b889b586f499cc87eb2b0b7e901778b3b5cf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:29 +0000 Subject: [PATCH 055/302] powerpc/32: Move exception prolog code into .text once MMU is back on The space in the head section is rather constrained by the fact that exception vectors are spread every 0x100 bytes and sometimes we need to have "out of line" code because it doesn't fit. Now that we are enabling MMU early in the prolog, take that opportunity to jump somewhere else in the .text section where we don't have any space constraint. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/38b31ca4bc782a4985bc7952a675404d7ff27c24.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 5 ++++ arch/powerpc/kernel/head_40x.S | 6 +++++ arch/powerpc/kernel/head_8xx.S | 25 ++++++++------------ arch/powerpc/kernel/head_book3s_32.S | 34 ++++++++++++---------------- 4 files changed, 36 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index d97ec94b34dab9..3c0aa453851421 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -70,6 +70,8 @@ mtspr SPRN_SRR0, r11 mfspr r11, SPRN_SPRG_SCRATCH2 rfi + + .text 1: stw r11,GPR1(r1) stw r11,0(r1) @@ -163,12 +165,14 @@ */ #ifdef CONFIG_PPC_BOOK3S #define START_EXCEPTION(n, label) \ + __HEAD; \ . = n; \ DO_KVM n; \ label: #else #define START_EXCEPTION(n, label) \ + __HEAD; \ . = n; \ label: @@ -196,6 +200,7 @@ ret_from_except) .macro vmap_stack_overflow_exception + __HEAD vmap_stack_overflow: #ifdef CONFIG_SMP mfspr r1, SPRN_SPRG_THREAD diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index c14a71e0d6d37c..e7d8856714d3ac 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -132,6 +132,8 @@ _ENTRY(crit_esr) ori r11, r11, 1f@l mtspr SPRN_SRR0, r11 rfi + + .text 1: lwz r11,crit_r1@l(0) stw r11,GPR1(r1) @@ -496,6 +498,7 @@ _ENTRY(crit_esr) crit_transfer_to_handler, ret_from_crit_exc) /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ + __HEAD Decrementer: EXCEPTION_PROLOG lis r0,TSR_PIS@h @@ -504,12 +507,14 @@ Decrementer: EXC_XFER_LITE(0x1000, timer_interrupt) /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ + __HEAD FITException: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_STD(0x1010, unknown_exception) /* Watchdog Timer (WDT) Exception. (from 0x1020) */ + __HEAD WDTException: CRITICAL_EXCEPTION_PROLOG; addi r3,r1,STACK_FRAME_OVERHEAD; @@ -523,6 +528,7 @@ WDTException: * reserved. */ + __HEAD /* Damn, I came up one instruction too many to fit into the * exception space :-). Both the instruction and data TLB * miss get to this point to load the TLB. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 11789a077d769f..d16d0ec71bb2bd 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -133,7 +133,7 @@ instruction_counter: START_EXCEPTION(0x600, Alignment) EXCEPTION_PROLOG handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD - b .Lalignment_exception_ool + EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -141,11 +141,6 @@ instruction_counter: /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - /* With VMAP_STACK there's not enough room for this at 0x600 */ - . = 0xa00 -.Lalignment_exception_ool: - EXC_XFER_STD(0x600, alignment_exception) - /* System call */ START_EXCEPTION(0xc00, SystemCall) SYSCALL_ENTRY 0xc00 @@ -339,26 +334,25 @@ DARFixed:/* Return from dcbx instruction bug workaround */ * support of breakpoints and such. Someday I will get around to * using them. */ -do_databreakpoint: - EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 - addi r3,r1,STACK_FRAME_OVERHEAD - mfspr r4,SPRN_BAR - stw r4,_DAR(r11) - EXC_XFER_STD(0x1c00, do_break) - START_EXCEPTION(0x1c00, DataBreakpoint) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_SRR0 cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l cror 4*cr1+eq, 4*cr1+eq, 4*cr7+eq - bne cr1, do_databreakpoint + bne cr1, 1f mtcr r10 mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 rfi +1: EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 handle_dar_dsisr=1 + addi r3,r1,STACK_FRAME_OVERHEAD + mfspr r4,SPRN_BAR + stw r4,_DAR(r11) + EXC_XFER_STD(0x1c00, do_break) + #ifdef CONFIG_PERF_EVENTS START_EXCEPTION(0x1d00, InstructionBreakpoint) mtspr SPRN_SPRG_SCRATCH0, r10 @@ -376,6 +370,7 @@ do_databreakpoint: EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) + __HEAD . = 0x2000 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 17510c99802d7d..4a74bbe7462b9b 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -269,11 +269,10 @@ __secondary_hold_acknowledge: 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP - beq cr1, machine_check_tramp + beq cr1, 1f twi 31, 0, 0 -#else - b machine_check_tramp #endif +1: EXC_XFER_STD(0x200, machine_check_exception) /* Data access exception. */ START_EXCEPTION(0x300, DataAccess) @@ -297,7 +296,13 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) #endif 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 - b handle_page_fault_tramp_1 + EXCEPTION_PROLOG_2 handle_dar_dsisr=1 + lwz r5, _DSISR(r11) + andis. r0, r5, DSISR_DABRMATCH@h + bne- 1f + EXC_XFER_LITE(0x300, handle_page_fault) +1: EXC_XFER_STD(0x300, do_break) + /* Instruction access exception. */ START_EXCEPTION(0x400, InstructionAccess) @@ -333,7 +338,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) START_EXCEPTION(0x600, Alignment) EXCEPTION_PROLOG handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD - b alignment_exception_tramp + EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -385,6 +390,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) START_EXCEPTION(0xf20, AltiVecUnavailableTrap) b AltiVecUnavailable + __HEAD /* * Handle TLB miss for instruction on 603/603e. * Note: we get an alternate set of r0 - r3 to use automatically. @@ -627,22 +633,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD) EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD) + __HEAD . = 0x3000 -machine_check_tramp: - EXC_XFER_STD(0x200, machine_check_exception) - -alignment_exception_tramp: - EXC_XFER_STD(0x600, alignment_exception) - -handle_page_fault_tramp_1: - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 - lwz r5, _DSISR(r11) - andis. r0, r5, DSISR_DABRMATCH@h - bne- 1f - EXC_XFER_LITE(0x300, handle_page_fault) -1: EXC_XFER_STD(0x300, do_break) - #ifdef CONFIG_PPC_BOOK3S_604 .macro save_regs_thread thread stw r0, THR0(\thread) @@ -721,6 +714,7 @@ fast_hash_page_return: vmap_stack_overflow_exception #endif + __HEAD AltiVecUnavailable: EXCEPTION_PROLOG #ifdef CONFIG_ALTIVEC @@ -731,12 +725,14 @@ AltiVecUnavailable: 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_LITE(0xf20, altivec_unavailable_exception) + __HEAD PerformanceMonitor: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0xf00, performance_monitor_exception) + __HEAD /* * This code is jumped to from the startup code to copy * the kernel image to physical address PHYSICAL_START. From 8f844c06f460687b028c675c3fa68f8e735aeb8c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:30 +0000 Subject: [PATCH 056/302] powerpc/32: Provide a name to exception prolog continuation in virtual mode Now that the prolog continuation is separated in .text, give it a name and mark it _ASM_NOKPROBE_SYMBOL. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d96374218815a6627e1e922ab2aba994050fb87a.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 12 +++++++----- arch/powerpc/kernel/head_40x.S | 22 ++++++++++++---------- arch/powerpc/kernel/head_8xx.S | 10 +++++----- arch/powerpc/kernel/head_book3s_32.S | 14 +++++++------- 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 3c0aa453851421..160ebd573c3794 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -10,10 +10,10 @@ * We assume sprg3 has the physical address of the current * task's thread_struct. */ -.macro EXCEPTION_PROLOG handle_dar_dsisr=0 +.macro EXCEPTION_PROLOG name handle_dar_dsisr=0 EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=\handle_dar_dsisr + EXCEPTION_PROLOG_2 \name handle_dar_dsisr=\handle_dar_dsisr .endm .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0 @@ -56,7 +56,7 @@ #endif .endm -.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 +.macro EXCEPTION_PROLOG_2 name handle_dar_dsisr=0 #ifdef CONFIG_PPC_8xx .if \handle_dar_dsisr li r11, RPN_PATTERN @@ -72,6 +72,7 @@ rfi .text +\name\()_virt: 1: stw r11,GPR1(r1) stw r11,0(r1) @@ -109,6 +110,7 @@ stw r10,8(r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) +_ASM_NOKPROBE_SYMBOL(\name\()_virt) .endm .macro SYSCALL_ENTRY trapno @@ -180,7 +182,7 @@ #define EXCEPTION(n, label, hdlr, xfer) \ START_EXCEPTION(n, label) \ - EXCEPTION_PROLOG; \ + EXCEPTION_PROLOG label; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ xfer(n, hdlr) @@ -212,7 +214,7 @@ #endif lwz r1, emergency_ctx@l(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE - EXCEPTION_PROLOG_2 + EXCEPTION_PROLOG_2 vmap_stack_overflow SAVE_NVGPRS(r11) addi r3, r1, STACK_FRAME_OVERHEAD EXC_XFER_STD(0, stack_overflow_exception) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index e7d8856714d3ac..86883ccb3dc57b 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -104,7 +104,7 @@ _ENTRY(crit_esr) * Instead we use a couple of words of memory at low physical addresses. * This is OK since we don't support SMP on these processors. */ -.macro CRITICAL_EXCEPTION_PROLOG +.macro CRITICAL_EXCEPTION_PROLOG name stw r10,crit_r10@l(0) /* save two registers to work with */ stw r11,crit_r11@l(0) mfspr r10,SPRN_SRR0 @@ -135,6 +135,7 @@ _ENTRY(crit_esr) .text 1: +\name\()_virt: lwz r11,crit_r1@l(0) stw r11,GPR1(r1) stw r11,0(r1) @@ -162,6 +163,7 @@ _ENTRY(crit_esr) stw r10, 8(r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) +_ASM_NOKPROBE_SYMBOL(\name\()_virt) .endm /* @@ -182,7 +184,7 @@ _ENTRY(crit_esr) */ #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ - CRITICAL_EXCEPTION_PROLOG; \ + CRITICAL_EXCEPTION_PROLOG label; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ crit_transfer_to_handler, ret_from_crit_exc) @@ -205,7 +207,7 @@ _ENTRY(crit_esr) * if they can't resolve the lightweight TLB fault. */ START_EXCEPTION(0x0300, DataStorage) - EXCEPTION_PROLOG handle_dar_dsisr=1 + EXCEPTION_PROLOG DataStorage handle_dar_dsisr=1 EXC_XFER_LITE(0x300, handle_page_fault) /* @@ -213,7 +215,7 @@ _ENTRY(crit_esr) * This is caused by a fetch from non-execute or guarded pages. */ START_EXCEPTION(0x0400, InstructionAccess) - EXCEPTION_PROLOG + EXCEPTION_PROLOG InstructionAccess li r5,0 stw r5, _ESR(r11) /* Zero ESR */ stw r12, _DEAR(r11) /* SRR0 as DEAR */ @@ -224,13 +226,13 @@ _ENTRY(crit_esr) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) - EXCEPTION_PROLOG handle_dar_dsisr=1 + EXCEPTION_PROLOG Alignment handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) - EXCEPTION_PROLOG handle_dar_dsisr=1 + EXCEPTION_PROLOG ProgramCheck handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x700, program_check_exception) @@ -450,7 +452,7 @@ _ENTRY(crit_esr) */ /* 0x2000 - Debug Exception */ START_EXCEPTION(0x2000, DebugTrap) - CRITICAL_EXCEPTION_PROLOG + CRITICAL_EXCEPTION_PROLOG DebugTrap /* * If this is a single step or branch-taken exception in an @@ -500,7 +502,7 @@ _ENTRY(crit_esr) /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ __HEAD Decrementer: - EXCEPTION_PROLOG + EXCEPTION_PROLOG Decrementer lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -509,14 +511,14 @@ Decrementer: /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ __HEAD FITException: - EXCEPTION_PROLOG + EXCEPTION_PROLOG FITException addi r3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_STD(0x1010, unknown_exception) /* Watchdog Timer (WDT) Exception. (from 0x1020) */ __HEAD WDTException: - CRITICAL_EXCEPTION_PROLOG; + CRITICAL_EXCEPTION_PROLOG WDTException addi r3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index d16d0ec71bb2bd..932702a38234dd 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -122,7 +122,7 @@ instruction_counter: /* Machine check */ START_EXCEPTION(0x200, MachineCheck) - EXCEPTION_PROLOG handle_dar_dsisr=1 + EXCEPTION_PROLOG MachineCheck handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x200, machine_check_exception) @@ -131,7 +131,7 @@ instruction_counter: /* Alignment exception */ START_EXCEPTION(0x600, Alignment) - EXCEPTION_PROLOG handle_dar_dsisr=1 + EXCEPTION_PROLOG Alignment handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) @@ -294,7 +294,7 @@ instruction_counter: * addresses. There is nothing to do but handle a big time error fault. */ START_EXCEPTION(0x1300, InstructionTLBError) - EXCEPTION_PROLOG + EXCEPTION_PROLOG InstructionTLBError andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ andis. r10,r9,SRR1_ISI_NOPT@h beq+ .Litlbie @@ -316,7 +316,7 @@ instruction_counter: beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 + EXCEPTION_PROLOG_2 DataTLBError handle_dar_dsisr=1 lwz r4, _DAR(r11) lwz r5, _DSISR(r11) andis. r10,r5,DSISR_NOHPTE@h @@ -347,7 +347,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ rfi 1: EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 + EXCEPTION_PROLOG_2 DataBreakpoint handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_BAR stw r4,_DAR(r11) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 4a74bbe7462b9b..4ff67c5cae1ca3 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -266,7 +266,7 @@ __secondary_hold_acknowledge: mfspr r1, SPRN_SPRG_SCRATCH2 #endif /* CONFIG_PPC_CHRP */ EXCEPTION_PROLOG_1 -7: EXCEPTION_PROLOG_2 +7: EXCEPTION_PROLOG_2 MachineCheck addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP beq cr1, 1f @@ -296,7 +296,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) #endif 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 + EXCEPTION_PROLOG_2 DataAccess handle_dar_dsisr=1 lwz r5, _DSISR(r11) andis. r0, r5, DSISR_DABRMATCH@h bne- 1f @@ -325,7 +325,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) andi. r11, r11, MSR_PR EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 + EXCEPTION_PROLOG_2 InstructionAccess andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r5, _DSISR(r11) stw r12, _DAR(r11) @@ -336,7 +336,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) /* Alignment exception */ START_EXCEPTION(0x600, Alignment) - EXCEPTION_PROLOG handle_dar_dsisr=1 + EXCEPTION_PROLOG Alignment handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) @@ -353,7 +353,7 @@ BEGIN_FTR_SECTION */ b ProgramCheck END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) - EXCEPTION_PROLOG + EXCEPTION_PROLOG FPUnavailable beq 1f bl load_up_fpu /* if from user, just load it up */ b fast_exception_return @@ -716,7 +716,7 @@ fast_hash_page_return: __HEAD AltiVecUnavailable: - EXCEPTION_PROLOG + EXCEPTION_PROLOG AltiVecUnavailable #ifdef CONFIG_ALTIVEC beq 1f bl load_up_altivec /* if from user, just load it up */ @@ -727,7 +727,7 @@ AltiVecUnavailable: __HEAD PerformanceMonitor: - EXCEPTION_PROLOG + EXCEPTION_PROLOG PerformanceMonitor addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0xf00, performance_monitor_exception) From 32d2ca0e969a3620f71dff166a95ebf3f735b72e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:31 +0000 Subject: [PATCH 057/302] powerpc/32: Refactor booke critical registers saving Refactor booke critical registers saving into a few macros and move it into the exception prolog directly. Keep the dedicated transfert_to_handler entry point for the moment allthough they are empty. They will be removed in a later patch to reduce churn. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/269171496f1f5f22afa621695bded22976c9d48d.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 33 ------------------------- arch/powerpc/kernel/head_booke.h | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 01a064c8a96aaa..ad1fd33e1126b1 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -51,49 +51,16 @@ #ifdef CONFIG_BOOKE .globl mcheck_transfer_to_handler mcheck_transfer_to_handler: - mfspr r0,SPRN_DSRR0 - stw r0,_DSRR0(r11) - mfspr r0,SPRN_DSRR1 - stw r0,_DSRR1(r11) /* fall through */ _ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler) .globl debug_transfer_to_handler debug_transfer_to_handler: - mfspr r0,SPRN_CSRR0 - stw r0,_CSRR0(r11) - mfspr r0,SPRN_CSRR1 - stw r0,_CSRR1(r11) /* fall through */ _ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler) .globl crit_transfer_to_handler crit_transfer_to_handler: -#ifdef CONFIG_PPC_BOOK3E_MMU - mfspr r0,SPRN_MAS0 - stw r0,MAS0(r11) - mfspr r0,SPRN_MAS1 - stw r0,MAS1(r11) - mfspr r0,SPRN_MAS2 - stw r0,MAS2(r11) - mfspr r0,SPRN_MAS3 - stw r0,MAS3(r11) - mfspr r0,SPRN_MAS6 - stw r0,MAS6(r11) -#ifdef CONFIG_PHYS_64BIT - mfspr r0,SPRN_MAS7 - stw r0,MAS7(r11) -#endif /* CONFIG_PHYS_64BIT */ -#endif /* CONFIG_PPC_BOOK3E_MMU */ -#ifdef CONFIG_44x - mfspr r0,SPRN_MMUCR - stw r0,MMUCR(r11) -#endif - mfspr r0,SPRN_SRR0 - stw r0,_SRR0(r11) - mfspr r0,SPRN_SRR1 - stw r0,_SRR1(r11) - /* fall through */ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index f712b9bc6d6207..a127d5e7efb4de 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -229,6 +229,36 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) +#define SAVE_xSRR(xSRR) \ + mfspr r0,SPRN_##xSRR##0; \ + stw r0,_##xSRR##0(r1); \ + mfspr r0,SPRN_##xSRR##1; \ + stw r0,_##xSRR##1(r1) + + +.macro SAVE_MMU_REGS +#ifdef CONFIG_PPC_BOOK3E_MMU + mfspr r0,SPRN_MAS0 + stw r0,MAS0(r1) + mfspr r0,SPRN_MAS1 + stw r0,MAS1(r1) + mfspr r0,SPRN_MAS2 + stw r0,MAS2(r1) + mfspr r0,SPRN_MAS3 + stw r0,MAS3(r1) + mfspr r0,SPRN_MAS6 + stw r0,MAS6(r1) +#ifdef CONFIG_PHYS_64BIT + mfspr r0,SPRN_MAS7 + stw r0,MAS7(r1) +#endif /* CONFIG_PHYS_64BIT */ +#endif /* CONFIG_PPC_BOOK3E_MMU */ +#ifdef CONFIG_44x + mfspr r0,SPRN_MMUCR + stw r0,MMUCR(r1) +#endif +.endm + #define CRITICAL_EXCEPTION_PROLOG(intno) \ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1) #define DEBUG_EXCEPTION_PROLOG \ @@ -271,6 +301,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) START_EXCEPTION(label); \ CRITICAL_EXCEPTION_PROLOG(intno); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ crit_transfer_to_handler, ret_from_crit_exc) @@ -280,6 +312,10 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mfspr r5,SPRN_ESR; \ stw r5,_ESR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ + SAVE_xSRR(DSRR); \ + SAVE_xSRR(CSRR); \ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ mcheck_transfer_to_handler, ret_from_mcheck_exc) @@ -363,6 +399,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) 2: mfspr r4,SPRN_DBSR; \ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ addi r3,r1,STACK_FRAME_OVERHEAD; \ + SAVE_xSRR(CSRR); \ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc) #define DEBUG_CRIT_EXCEPTION \ @@ -417,6 +456,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) 2: mfspr r4,SPRN_DBSR; \ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ addi r3,r1,STACK_FRAME_OVERHEAD; \ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) #define DATA_STORAGE_EXCEPTION \ From 0f2793e33db2e2f062968f2ca789b6826972b05b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:32 +0000 Subject: [PATCH 058/302] powerpc/32: Perform normal function call in exception entry Now that the MMU is re-enabled before calling the transfer function, we don't need anymore that hack with the address of the handler and the return function sitting just after the 'bl' to the transfer fonction, that function is retrieving via a read relative to 'lr'. Do a regular call to the transfer function, then to the handler, then branch to the return function. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/73c00f3361ca280ef8fd7814c291bd1f5b6e2081.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 14 ++++---------- arch/powerpc/kernel/head_32.h | 4 ++-- arch/powerpc/kernel/head_booke.h | 6 +++--- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ad1fd33e1126b1..fb849ef922fb39 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -104,7 +104,7 @@ transfer_to_handler: #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif - b 3f + blr /* if from kernel, check interrupted DOZE/NAP mode */ 2: @@ -118,13 +118,7 @@ transfer_to_handler: #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ .globl transfer_to_handler_cont transfer_to_handler_cont: -3: - mflr r9 - lwz r11,0(r9) /* virtual address of handler */ - lwz r9,4(r9) /* where to go when done */ - mtctr r11 - mtlr r9 - bctr /* jump to handler */ + blr #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING @@ -404,8 +398,8 @@ fee_restarts: stw r10,_TRAP(r11) addi r3,r1,STACK_FRAME_OVERHEAD bl transfer_to_handler_full - .long unrecoverable_exception - .long ret_from_except + bl unrecoverable_exception + b ret_from_except #endif .globl ret_from_except_full diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 160ebd573c3794..e09585b88ba7b9 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -190,8 +190,8 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) li r10,trap; \ stw r10,_TRAP(r11); \ bl tfer; \ - .long hdlr; \ - .long ret + bl hdlr; \ + b ret #define EXC_XFER_STD(n, hdlr) \ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index a127d5e7efb4de..3707f49f0b78cf 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -322,9 +322,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ li r10,trap; \ stw r10,_TRAP(r11); \ - bl tfer; \ - .long hdlr; \ - .long ret + bl tfer; \ + bl hdlr; \ + b ret; \ #define EXC_XFER_STD(n, hdlr) \ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ From e9f99704aafcdbd90ba20b81db2dae8526d8b8e5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:33 +0000 Subject: [PATCH 059/302] powerpc/32: Always save non volatile registers on exception entry In preparation of handling exception entry and exit in C, in order to simplify the handling, always save non volatile registers when entering an exception. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3ce8ced87a4f1467fa36fcc50763d53b45e466c1.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ptrace.h | 6 ++---- arch/powerpc/kernel/entry_32.S | 13 +------------ arch/powerpc/kernel/head_32.h | 3 +-- arch/powerpc/kernel/head_booke.h | 2 +- 4 files changed, 5 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index bedbca062f0353..f10498e1b3f617 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -209,16 +209,14 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) */ #define TRAP_FLAGS_MASK 0x1F #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -#define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) +#define FULL_REGS(regs) true +#define SET_FULL_REGS(regs) do { } while (0) #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0) #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) #define NV_REG_POISON 0xdeadbeef #define CHECK_FULL_REGS(regs) \ do { \ - if ((regs)->trap & 1) \ - printk(KERN_CRIT "%s: partial register set\n", __func__); \ } while (0) #endif /* __powerpc64__ */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index fb849ef922fb39..7084289994b3de 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -81,12 +81,12 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) */ .globl transfer_to_handler_full transfer_to_handler_full: - SAVE_NVGPRS(r11) _ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) /* fall through */ .globl transfer_to_handler transfer_to_handler: + SAVE_NVGPRS(r11) stw r2,GPR2(r11) stw r12,_NIP(r11) stw r9,_MSR(r11) @@ -234,10 +234,6 @@ handle_page_fault: bl do_page_fault cmpwi r3,0 beq+ ret_from_except - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - clrrwi r0,r0,1 - stw r0,_TRAP(r1) mr r4,r3 /* err arg for bad_page_fault */ addi r3,r1,STACK_FRAME_OVERHEAD bl __bad_page_fault @@ -810,13 +806,6 @@ recheck: do_user_signal: /* r10 contains MSR_KERNEL here */ ori r10,r10,MSR_EE mtmsr r10 /* hard-enable interrupts */ - /* save r13-r31 in the exception frame, if not already done */ - lwz r3,_TRAP(r1) - andi. r0,r3,1 - beq 2f - SAVE_NVGPRS(r1) - rlwinm r3,r3,0,0,30 - stw r3,_TRAP(r1) 2: addi r3,r1,STACK_FRAME_OVERHEAD mr r4,r9 bl do_notify_resume diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index e09585b88ba7b9..087445e4548979 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -198,7 +198,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) ret_from_except_full) #define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler, \ ret_from_except) .macro vmap_stack_overflow_exception @@ -215,7 +215,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) lwz r1, emergency_ctx@l(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 vmap_stack_overflow - SAVE_NVGPRS(r11) addi r3, r1, STACK_FRAME_OVERHEAD EXC_XFER_STD(0, stack_overflow_exception) .endm diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 3707f49f0b78cf..b31bf9e833c0c1 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -331,7 +331,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) ret_from_except_full) #define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler, \ ret_from_except) /* Check for a single step debug exception while in an exception From b96bae3ae2cb6337c0a1ad160f4cbb0666e5e38b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:34 +0000 Subject: [PATCH 060/302] powerpc/32: Replace ASM exception exit by C exception exit from ppc64 This patch replaces the PPC32 ASM exception exit by C exception exit. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/48f8bae91da899d8e73fc0d75c9af66cc97b4d5b.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 436 +++++++++----------------------- arch/powerpc/kernel/interrupt.c | 6 +- 2 files changed, 123 insertions(+), 319 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 7084289994b3de..79311d0bd09bca 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -336,7 +336,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) fast_exception_return: #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) andi. r10,r9,MSR_RI /* check for recoverable interrupt */ - beq 1f /* if not, we've got problems */ + beq 3f /* if not, we've got problems */ #endif 2: REST_4GPRS(3, r11) @@ -363,30 +363,6 @@ fast_exception_return: #endif _ASM_NOKPROBE_SYMBOL(fast_exception_return) -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) -/* check if the exception happened in a restartable section */ -1: lis r3,exc_exit_restart_end@ha - addi r3,r3,exc_exit_restart_end@l - cmplw r12,r3 - bge 3f - lis r4,exc_exit_restart@ha - addi r4,r4,exc_exit_restart@l - cmplw r12,r4 - blt 3f - lis r3,fee_restarts@ha - tophys(r3,r3) - lwz r5,fee_restarts@l(r3) - addi r5,r5,1 - stw r5,fee_restarts@l(r3) - mr r12,r4 /* restart at exc_exit_restart */ - b 2b - - .section .bss - .align 2 -fee_restarts: - .space 4 - .previous - /* aargh, a nonrecoverable interrupt, panic */ /* aargh, we don't know which trap this is */ 3: @@ -395,8 +371,7 @@ fee_restarts: addi r3,r1,STACK_FRAME_OVERHEAD bl transfer_to_handler_full bl unrecoverable_exception - b ret_from_except -#endif + trap /* should not get here */ .globl ret_from_except_full ret_from_except_full: @@ -405,213 +380,145 @@ ret_from_except_full: .globl ret_from_except ret_from_except: - /* Hard-disable interrupts so that current_thread_info()->flags - * can't change between when we test it and when we return - * from the interrupt. */ - /* Note: We don't bother telling lockdep about it */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) - mtmsr r10 /* disable interrupts */ - - lwz r3,_MSR(r1) /* Returning to user mode? */ - andi. r0,r3,MSR_PR - beq resume_kernel - -user_exc_return: /* r10 contains MSR_KERNEL here */ - /* Check current_thread_info()->flags */ - lwz r9,TI_FLAGS(r2) - andi. r0,r9,_TIF_USER_WORK_MASK - bne do_work - -restore_user: -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - /* Check whether this process has its own DBCR0 value. The internal - debug mode bit tells us that dbcr0 should be loaded. */ - lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h - bnel- load_dbcr0 -#endif - ACCOUNT_CPU_USER_EXIT(r2, r10, r11) +_ASM_NOKPROBE_SYMBOL(ret_from_except) + + .globl interrupt_return +interrupt_return: + lwz r4,_MSR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + andi. r0,r4,MSR_PR + beq .Lkernel_interrupt_return + bl interrupt_exit_user_prepare + cmpwi r3,0 + bne- .Lrestore_nvgprs + +.Lfast_user_interrupt_return: #ifdef CONFIG_PPC_BOOK3S_32 kuep_unlock r10, r11 #endif + kuap_check r2, r4 + lwz r11,_NIP(r1) + lwz r12,_MSR(r1) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 - b restore - -/* N.B. the only way to get here is from the beq following ret_from_except. */ -resume_kernel: - /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ - lwz r8,TI_FLAGS(r2) - andis. r0,r8,_TIF_EMULATE_STACK_STORE@h - beq+ 1f +BEGIN_FTR_SECTION + stwcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + lwarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ + lwz r3,_CCR(r1) + lwz r4,_LINK(r1) + lwz r5,_CTR(r1) + lwz r6,_XER(r1) + li r0,0 - lwz r3,GPR1(r1) - subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ - mr r4,r1 /* src: current exception frame */ - mr r1,r3 /* Reroute the trampoline frame to r1 */ + /* + * Leaving a stale exception_marker on the stack can confuse + * the reliable stack unwinder later on. Clear it. + */ + stw r0,8(r1) + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) - /* Copy from the original to the trampoline. */ - li r5,INT_FRAME_SIZE/4 /* size: INT_FRAME_SIZE */ - li r6,0 /* start offset: 0 */ + mtcr r3 + mtlr r4 mtctr r5 -2: lwzx r0,r6,r4 - stwx r0,r6,r3 - addi r6,r6,4 - bdnz 2b - - /* Do real store operation to complete stwu */ - lwz r5,GPR1(r1) - stw r8,0(r5) - - /* Clear _TIF_EMULATE_STACK_STORE flag */ - lis r11,_TIF_EMULATE_STACK_STORE@h - addi r5,r2,TI_FLAGS -0: lwarx r8,0,r5 - andc r8,r8,r11 - stwcx. r8,0,r5 - bne- 0b -1: + mtspr SPRN_XER,r6 -#ifdef CONFIG_PREEMPTION - /* check current_thread_info->preempt_count */ - lwz r0,TI_PREEMPT(r2) - cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ - bne restore_kuap - andi. r8,r8,_TIF_NEED_RESCHED - beq+ restore_kuap - lwz r3,_MSR(r1) - andi. r0,r3,MSR_EE /* interrupts off? */ - beq restore_kuap /* don't schedule if so */ -#ifdef CONFIG_TRACE_IRQFLAGS - /* Lockdep thinks irqs are enabled, we need to call - * preempt_schedule_irq with IRQs off, so we inform lockdep - * now that we -did- turn them off already - */ - bl trace_hardirqs_off -#endif - bl preempt_schedule_irq -#ifdef CONFIG_TRACE_IRQFLAGS - /* And now, to properly rebalance the above, we tell lockdep they - * are being turned back on, which will happen when we return - */ - bl trace_hardirqs_on + REST_4GPRS(2, r1) + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ #endif -#endif /* CONFIG_PREEMPTION */ -restore_kuap: - kuap_restore r1, r2, r9, r10, r0 - /* interrupts are hard-disabled at this point */ -restore: -#if defined(CONFIG_44x) && !defined(CONFIG_PPC_47x) - lis r4,icache_44x_need_flush@ha - lwz r5,icache_44x_need_flush@l(r4) - cmplwi cr0,r5,0 - beq+ 1f - li r6,0 - iccci r0,r0 - stw r6,icache_44x_need_flush@l(r4) -1: -#endif /* CONFIG_44x */ - - lwz r9,_MSR(r1) -#ifdef CONFIG_TRACE_IRQFLAGS - /* Lockdep doesn't know about the fact that IRQs are temporarily turned - * off in this assembly code while peeking at TI_FLAGS() and such. However - * we need to inform it if the exception turned interrupts off, and we - * are about to trun them back on. - */ - andi. r10,r9,MSR_EE - beq 1f - stwu r1,-32(r1) - mflr r0 - stw r0,4(r1) - bl trace_hardirqs_on - addi r1, r1, 32 - lwz r9,_MSR(r1) -1: -#endif /* CONFIG_TRACE_IRQFLAGS */ +.Lrestore_nvgprs: + REST_NVGPRS(r1) + b .Lfast_user_interrupt_return - lwz r0,GPR0(r1) - lwz r2,GPR2(r1) - REST_4GPRS(3, r1) - REST_2GPRS(7, r1) +.Lkernel_interrupt_return: + bl interrupt_exit_kernel_prepare - lwz r10,_XER(r1) - lwz r11,_CTR(r1) - mtspr SPRN_XER,r10 - mtctr r11 +.Lfast_kernel_interrupt_return: + cmpwi cr1,r3,0 + kuap_restore r1, r2, r3, r4, r5 + lwz r11,_NIP(r1) + lwz r12,_MSR(r1) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 BEGIN_FTR_SECTION - lwarx r11,0,r1 -END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) - stwcx. r0,0,r1 /* to clear the reservation */ + stwcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + lwarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) - andi. r10,r9,MSR_RI /* check if this exception occurred */ - beql nonrecoverable /* at a bad place (MSR:RI = 0) */ + lwz r3,_LINK(r1) + lwz r4,_CTR(r1) + lwz r5,_XER(r1) + lwz r6,_CCR(r1) + li r0,0 - lwz r10,_CCR(r1) - lwz r11,_LINK(r1) - mtcrf 0xFF,r10 - mtlr r11 + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) + + mtlr r3 + mtctr r4 + mtspr SPRN_XER,r5 - /* Clear the exception_marker on the stack to avoid confusing stacktrace */ - li r10, 0 - stw r10, 8(r1) /* - * Once we put values in SRR0 and SRR1, we are in a state - * where exceptions are not recoverable, since taking an - * exception will trash SRR0 and SRR1. Therefore we clear the - * MSR:RI bit to indicate this. If we do take an exception, - * we can't return to the point of the exception but we - * can restart the exception exit path at the label - * exc_exit_restart below. -- paulus + * Leaving a stale exception_marker on the stack can confuse + * the reliable stack unwinder later on. Clear it. */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI) - mtmsr r10 /* clear the RI bit */ - .globl exc_exit_restart -exc_exit_restart: - lwz r12,_NIP(r1) - mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r9 - REST_4GPRS(9, r1) - lwz r1,GPR1(r1) - .globl exc_exit_restart_end -exc_exit_restart_end: + stw r0,8(r1) + + REST_4GPRS(2, r1) + + bne- cr1,1f /* emulate stack store */ + mtcr r6 + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) rfi -_ASM_NOKPROBE_SYMBOL(exc_exit_restart) -_ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif -#else /* !(CONFIG_4xx || CONFIG_BOOKE) */ - /* - * This is a bit different on 4xx/Book-E because it doesn't have - * the RI bit in the MSR. - * The TLB miss handler checks if we have interrupted - * the exception exit path and restarts it if so - * (well maybe one day it will... :). +1: /* + * Emulate stack store with update. New r1 value was already calculated + * and updated in our interrupt regs by emulate_loadstore, but we can't + * store the previous value of r1 to the stack before re-loading our + * registers from it, otherwise they could be clobbered. Use + * SPRG Scratch0 as temporary storage to hold the store + * data, as interrupts are disabled here so it won't be clobbered. */ - lwz r11,_LINK(r1) - mtlr r11 - lwz r10,_CCR(r1) - mtcrf 0xff,r10 - /* Clear the exception_marker on the stack to avoid confusing stacktrace */ - li r10, 0 - stw r10, 8(r1) - REST_2GPRS(9, r1) - .globl exc_exit_restart -exc_exit_restart: - lwz r11,_NIP(r1) - lwz r12,_MSR(r1) - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 - REST_2GPRS(11, r1) - lwz r1,GPR1(r1) - .globl exc_exit_restart_end -exc_exit_restart_end: + mtcr r6 +#ifdef CONFIG_BOOKE + mtspr SPRN_SPRG_WSCRATCH0, r9 +#else + mtspr SPRN_SPRG_SCRATCH0, r9 +#endif + addi r9,r1,INT_FRAME_SIZE /* get original r1 */ + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + stw r9,0(r1) /* perform store component of stwu */ +#ifdef CONFIG_BOOKE + mfspr r9, SPRN_SPRG_RSCRATCH0 +#else + mfspr r9, SPRN_SPRG_SCRATCH0 +#endif rfi - b . /* prevent prefetch past rfi */ -_ASM_NOKPROBE_SYMBOL(exc_exit_restart) +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif +_ASM_NOKPROBE_SYMBOL(interrupt_return) + +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) /* * Returning from a critical interrupt in user mode doesn't need @@ -642,8 +549,7 @@ _ASM_NOKPROBE_SYMBOL(exc_exit_restart) REST_NVGPRS(r1); \ lwz r3,_MSR(r1); \ andi. r3,r3,MSR_PR; \ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \ - bne user_exc_return; \ + bne interrupt_return; \ lwz r0,GPR0(r1); \ lwz r2,GPR2(r1); \ REST_4GPRS(3, r1); \ @@ -746,114 +652,8 @@ ret_from_mcheck_exc: RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) _ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc) #endif /* CONFIG_BOOKE */ - -/* - * Load the DBCR0 value for a task that is being ptraced, - * having first saved away the global DBCR0. Note that r0 - * has the dbcr0 value to set upon entry to this. - */ -load_dbcr0: - mfmsr r10 /* first disable debug exceptions */ - rlwinm r10,r10,0,~MSR_DE - mtmsr r10 - isync - mfspr r10,SPRN_DBCR0 - lis r11,global_dbcr0@ha - addi r11,r11,global_dbcr0@l -#ifdef CONFIG_SMP - lwz r9,TASK_CPU(r2) - slwi r9,r9,2 - add r11,r11,r9 -#endif - stw r10,0(r11) - mtspr SPRN_DBCR0,r0 - li r11,-1 - mtspr SPRN_DBSR,r11 /* clear all pending debug events */ - blr - - .section .bss - .align 4 - .global global_dbcr0 -global_dbcr0: - .space 4*NR_CPUS - .previous #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ -do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED - beq do_user_signal - -do_resched: /* r10 contains MSR_KERNEL here */ -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on - mfmsr r10 -#endif - ori r10,r10,MSR_EE - mtmsr r10 /* hard-enable interrupts */ - bl schedule -recheck: - /* Note: And we don't tell it we are disabling them again - * neither. Those disable/enable cycles used to peek at - * TI_FLAGS aren't advertised. - */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) - mtmsr r10 /* disable interrupts */ - lwz r9,TI_FLAGS(r2) - andi. r0,r9,_TIF_NEED_RESCHED - bne- do_resched - andi. r0,r9,_TIF_USER_WORK_MASK - beq restore_user -do_user_signal: /* r10 contains MSR_KERNEL here */ - ori r10,r10,MSR_EE - mtmsr r10 /* hard-enable interrupts */ -2: addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r9 - bl do_notify_resume - REST_NVGPRS(r1) - b recheck - -/* - * We come here when we are at the end of handling an exception - * that occurred at a place where taking an exception will lose - * state information, such as the contents of SRR0 and SRR1. - */ -nonrecoverable: - lis r10,exc_exit_restart_end@ha - addi r10,r10,exc_exit_restart_end@l - cmplw r12,r10 - bge 3f - lis r11,exc_exit_restart@ha - addi r11,r11,exc_exit_restart@l - cmplw r12,r11 - blt 3f - lis r10,ee_restarts@ha - lwz r12,ee_restarts@l(r10) - addi r12,r12,1 - stw r12,ee_restarts@l(r10) - mr r12,r11 /* restart at exc_exit_restart */ - blr -3: /* OK, we can't recover, kill this process */ - lwz r3,_TRAP(r1) - andi. r0,r3,1 - beq 5f - SAVE_NVGPRS(r1) - rlwinm r3,r3,0,0,30 - stw r3,_TRAP(r1) -5: mfspr r2,SPRN_SPRG_THREAD - addi r2,r2,-THREAD - tovirt(r2,r2) /* set back r2 to current */ -4: addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - /* shouldn't return */ - b 4b -_ASM_NOKPROBE_SYMBOL(nonrecoverable) - - .section .bss - .align 2 -ee_restarts: - .space 4 - .previous - /* * PROM code for specific machines follows. Put it * here so it's easy to add arch-specific sections later. diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index c475a229a42ac4..6875b82f613a1f 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -20,6 +20,10 @@ #include #include +#if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32) +unsigned long global_dbcr0[NR_CPUS]; +#endif + typedef long (*syscall_fn)(long, long, long, long, long, long); /* Has to run notrace because it is entered not completely "reconciled" */ @@ -392,7 +396,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned ti_flags = READ_ONCE(current_thread_info()->flags); } - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && unlikely((ti_flags & _TIF_RESTORE_TM))) { restore_tm_state(regs); From db297c3b07af7856fb7c666fbc9792d8e37556be Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:35 +0000 Subject: [PATCH 061/302] powerpc/32: Don't save thread.regs on interrupt entry Since commit 06d67d54741a ("powerpc: make process.c suitable for both 32-bit and 64-bit"), thread.regs is set on task creation, no need to set it again and again at each interrupt entry as it never change. Suggested-by: Nicholas Piggin Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20d52c627303d63e461797df13e6890fc04017d0.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 79311d0bd09bca..1e201fc4a590cd 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -97,10 +97,8 @@ transfer_to_handler: stw r2,_XER(r11) mfspr r12,SPRN_SPRG_THREAD tovirt(r12, r12) - beq 2f /* if from user, fix up THREAD.regs */ + beq 2f addi r2, r12, -THREAD - addi r11,r1,STACK_FRAME_OVERHEAD - stw r11,PT_REGS(r12) #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif @@ -147,7 +145,6 @@ transfer_to_syscall: /* Calling convention has r9 = orig r0, r10 = regs */ addi r10,r1,STACK_FRAME_OVERHEAD mr r9,r0 - stw r10,THREAD+PT_REGS(r2) bl system_call_exception ret_from_syscall: From e72915560b15f58c2ffe08144d9a7163daa18db4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:36 +0000 Subject: [PATCH 062/302] powerpc/32: Set regs parameter in r3 in transfer_to_handler All exception handlers take regs as first parameter. Instead of setting r3 just before each call to a handler, set it in transfer_to_handler. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f994a379bb895a2cbd518cb82460ad3f3d3ccdf5.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 3 +-- arch/powerpc/kernel/head_32.h | 2 -- arch/powerpc/kernel/head_40x.S | 7 ------- arch/powerpc/kernel/head_8xx.S | 3 --- arch/powerpc/kernel/head_book3s_32.S | 9 ++------- arch/powerpc/kernel/head_booke.h | 11 +---------- arch/powerpc/kernel/head_fsl_booke.S | 4 +--- 7 files changed, 5 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1e201fc4a590cd..e538ae73394d92 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -87,6 +87,7 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) .globl transfer_to_handler transfer_to_handler: SAVE_NVGPRS(r11) + addi r3,r1,STACK_FRAME_OVERHEAD stw r2,GPR2(r11) stw r12,_NIP(r11) stw r9,_MSR(r11) @@ -227,7 +228,6 @@ ret_from_kernel_thread: */ .globl handle_page_fault handle_page_fault: - addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault cmpwi r3,0 beq+ ret_from_except @@ -365,7 +365,6 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return) 3: li r10,-1 stw r10,_TRAP(r11) - addi r3,r1,STACK_FRAME_OVERHEAD bl transfer_to_handler_full bl unrecoverable_exception trap /* should not get here */ diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 087445e4548979..4d638d760a96eb 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -183,7 +183,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #define EXCEPTION(n, label, hdlr, xfer) \ START_EXCEPTION(n, label) \ EXCEPTION_PROLOG label; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ xfer(n, hdlr) #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ @@ -215,7 +214,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) lwz r1, emergency_ctx@l(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 vmap_stack_overflow - addi r3, r1, STACK_FRAME_OVERHEAD EXC_XFER_STD(0, stack_overflow_exception) .endm diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 86883ccb3dc57b..08563d4170c6dd 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -185,7 +185,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ CRITICAL_EXCEPTION_PROLOG label; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ crit_transfer_to_handler, ret_from_crit_exc) @@ -227,13 +226,11 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) EXCEPTION_PROLOG Alignment handle_dar_dsisr=1 - addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) EXCEPTION_PROLOG ProgramCheck handle_dar_dsisr=1 - addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x700, program_check_exception) EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) @@ -494,7 +491,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) /* continue normal handling for a critical exception... */ 2: mfspr r4,SPRN_DBSR stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */ - addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_TEMPLATE(DebugException, 0x2002, \ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ crit_transfer_to_handler, ret_from_crit_exc) @@ -505,21 +501,18 @@ Decrementer: EXCEPTION_PROLOG Decrementer lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ - addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_LITE(0x1000, timer_interrupt) /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ __HEAD FITException: EXCEPTION_PROLOG FITException - addi r3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_STD(0x1010, unknown_exception) /* Watchdog Timer (WDT) Exception. (from 0x1020) */ __HEAD WDTException: CRITICAL_EXCEPTION_PROLOG WDTException - addi r3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 932702a38234dd..eb1d40a8f2c4e4 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -123,7 +123,6 @@ instruction_counter: /* Machine check */ START_EXCEPTION(0x200, MachineCheck) EXCEPTION_PROLOG MachineCheck handle_dar_dsisr=1 - addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x200, machine_check_exception) /* External interrupt */ @@ -132,7 +131,6 @@ instruction_counter: /* Alignment exception */ START_EXCEPTION(0x600, Alignment) EXCEPTION_PROLOG Alignment handle_dar_dsisr=1 - addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ @@ -348,7 +346,6 @@ DARFixed:/* Return from dcbx instruction bug workaround */ 1: EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 DataBreakpoint handle_dar_dsisr=1 - addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_BAR stw r4,_DAR(r11) EXC_XFER_STD(0x1c00, do_break) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 4ff67c5cae1ca3..453f6ea959f907 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -267,7 +267,6 @@ __secondary_hold_acknowledge: #endif /* CONFIG_PPC_CHRP */ EXCEPTION_PROLOG_1 7: EXCEPTION_PROLOG_2 MachineCheck - addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP beq cr1, 1f twi 31, 0, 0 @@ -337,7 +336,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) /* Alignment exception */ START_EXCEPTION(0x600, Alignment) EXCEPTION_PROLOG Alignment handle_dar_dsisr=1 - addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ @@ -357,8 +355,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) beq 1f bl load_up_fpu /* if from user, just load it up */ b fast_exception_return -1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) +1: EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) #else b ProgramCheck #endif @@ -722,13 +719,11 @@ AltiVecUnavailable: bl load_up_altivec /* if from user, just load it up */ b fast_exception_return #endif /* CONFIG_ALTIVEC */ -1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0xf20, altivec_unavailable_exception) +1: EXC_XFER_LITE(0xf20, altivec_unavailable_exception) __HEAD PerformanceMonitor: EXCEPTION_PROLOG PerformanceMonitor - addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0xf00, performance_monitor_exception) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index b31bf9e833c0c1..009a56d70d76a7 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -294,13 +294,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define EXCEPTION(n, intno, label, hdlr, xfer) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(intno); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ xfer(n, hdlr) #define CRITICAL_EXCEPTION(n, intno, label, hdlr) \ START_EXCEPTION(label); \ CRITICAL_EXCEPTION_PROLOG(intno); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ SAVE_MMU_REGS; \ SAVE_xSRR(SRR); \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ @@ -311,7 +309,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) MCHECK_EXCEPTION_PROLOG; \ mfspr r5,SPRN_ESR; \ stw r5,_ESR(r11); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ SAVE_xSRR(DSRR); \ SAVE_xSRR(CSRR); \ SAVE_MMU_REGS; \ @@ -398,7 +395,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) /* continue normal handling for a debug exception... */ \ 2: mfspr r4,SPRN_DBSR; \ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ - addi r3,r1,STACK_FRAME_OVERHEAD; \ SAVE_xSRR(CSRR); \ SAVE_MMU_REGS; \ SAVE_xSRR(SRR); \ @@ -455,7 +451,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) /* continue normal handling for a critical exception... */ \ 2: mfspr r4,SPRN_DBSR; \ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ - addi r3,r1,STACK_FRAME_OVERHEAD; \ SAVE_MMU_REGS; \ SAVE_xSRR(SRR); \ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) @@ -482,7 +477,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_STD(0x0600, alignment_exception) #define PROGRAM_EXCEPTION \ @@ -490,7 +484,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) NORMAL_EXCEPTION_PROLOG(PROGRAM); \ mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ stw r4,_ESR(r11); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_STD(0x0700, program_check_exception) #define DECREMENTER_EXCEPTION \ @@ -498,7 +491,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) NORMAL_EXCEPTION_PROLOG(DECREMENTER); \ lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_LITE(0x0900, timer_interrupt) #define FP_UNAVAILABLE_EXCEPTION \ @@ -507,8 +499,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) beq 1f; \ bl load_up_fpu; /* if from user, just load it up */ \ b fast_exception_return; \ -1: addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_STD(0x800, kernel_fp_unavailable_exception) +1: EXC_XFER_STD(0x800, kernel_fp_unavailable_exception) #else /* __ASSEMBLY__ */ struct exception_regs { diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 3f4a40cccef5df..f51c66f747ad27 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -372,7 +372,6 @@ interrupt_base: bne 1f EXC_XFER_LITE(0x0300, handle_page_fault) 1: - addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_LITE(0x0300, CacheLockingException) /* Instruction Storage Interrupt */ @@ -618,8 +617,7 @@ END_BTB_FLUSH_SECTION beq 1f bl load_up_spe b fast_exception_return -1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x2010, KernelSPE) +1: EXC_XFER_LITE(0x2010, KernelSPE) #elif defined(CONFIG_SPE_POSSIBLE) EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ unknown_exception, EXC_XFER_STD) From af6f2ce84b2f666762f75f085a7e5d6514743a84 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:37 +0000 Subject: [PATCH 063/302] powerpc/32: Call bad_page_fault() from do_page_fault() Now that non volatile registers are saved at all time, no need to split bad_page_fault() out of do_page_fault(). Remove handle_page_fault() and use do_page_fault() directly. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cfb95be8863204cc2bf45a22ea44dd1d0dc16b7f.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 16 ---------------- arch/powerpc/kernel/head_40x.S | 4 ++-- arch/powerpc/kernel/head_8xx.S | 4 ++-- arch/powerpc/kernel/head_book3s_32.S | 4 ++-- arch/powerpc/kernel/head_booke.h | 4 ++-- arch/powerpc/kernel/head_fsl_booke.S | 2 +- arch/powerpc/mm/fault.c | 2 +- 7 files changed, 10 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e538ae73394d92..76e1502b3e6f73 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -220,22 +220,6 @@ ret_from_kernel_thread: li r3,0 b ret_from_syscall -/* - * Top-level page fault handling. - * This is in assembler because if do_page_fault tells us that - * it is a bad kernel page fault, we want to save the non-volatile - * registers before calling bad_page_fault. - */ - .globl handle_page_fault -handle_page_fault: - bl do_page_fault - cmpwi r3,0 - beq+ ret_from_except - mr r4,r3 /* err arg for bad_page_fault */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl __bad_page_fault - b ret_from_except_full - /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 08563d4170c6dd..a657783807043d 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -207,7 +207,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) */ START_EXCEPTION(0x0300, DataStorage) EXCEPTION_PROLOG DataStorage handle_dar_dsisr=1 - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, do_page_fault) /* * 0x0400 - Instruction Storage Exception @@ -218,7 +218,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) li r5,0 stw r5, _ESR(r11) /* Zero ESR */ stw r12, _DEAR(r11) /* SRR0 as DEAR */ - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, do_page_fault) /* 0x0500 - External Interrupt Exception */ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index eb1d40a8f2c4e4..4078d0dc2f1893 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -301,7 +301,7 @@ instruction_counter: .Litlbie: stw r12, _DAR(r11) stw r5, _DSISR(r11) - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, do_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to @@ -322,7 +322,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ tlbie r4 .Ldtlbie: /* 0x300 is DataAccess exception, needed by bad_page_fault() */ - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, do_page_fault) #ifdef CONFIG_VMAP_STACK vmap_stack_overflow_exception diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 453f6ea959f907..e78b4a7c23af6e 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -299,7 +299,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) lwz r5, _DSISR(r11) andis. r0, r5, DSISR_DABRMATCH@h bne- 1f - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, do_page_fault) 1: EXC_XFER_STD(0x300, do_break) @@ -328,7 +328,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r5, _DSISR(r11) stw r12, _DAR(r11) - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, do_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 009a56d70d76a7..036a69d1660599 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -462,7 +462,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ stw r4, _DEAR(r11); \ - EXC_XFER_LITE(0x0300, handle_page_fault) + EXC_XFER_LITE(0x0300, do_page_fault) #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ @@ -470,7 +470,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \ - EXC_XFER_LITE(0x0400, handle_page_fault) + EXC_XFER_LITE(0x0400, do_page_fault) #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index f51c66f747ad27..72e9aa45b99b7f 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -370,7 +370,7 @@ interrupt_base: stw r4, _DEAR(r11) andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f - EXC_XFER_LITE(0x0300, handle_page_fault) + EXC_XFER_LITE(0x0300, do_page_fault) 1: EXC_XFER_LITE(0x0300, CacheLockingException) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index bea13682c9092a..0d4e4ff77e03ac 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -557,7 +557,7 @@ static long __do_page_fault(struct pt_regs *regs) if (likely(entry)) { instruction_pointer_set(regs, extable_fixup(entry)); return 0; - } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { + } else if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { __bad_page_fault(regs, err); return 0; } else { From 719e7e212c7e637a795f130dbdd5db6c291e463f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:38 +0000 Subject: [PATCH 064/302] powerpc/32: Save trap number on stack in exception prolog Saving the trap number into the stack goes into the exception prolog, as EXC_XFER_xxx will soon disappear. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2ac7a0c9cde2ec2b23cd79e3a54cfedd816a91ae.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 14 ++++----- arch/powerpc/kernel/head_40x.S | 22 +++++++------- arch/powerpc/kernel/head_8xx.S | 14 ++++----- arch/powerpc/kernel/head_book3s_32.S | 14 ++++----- arch/powerpc/kernel/head_booke.h | 44 +++++++++++++++------------- arch/powerpc/kernel/head_fsl_booke.S | 4 +-- 6 files changed, 58 insertions(+), 54 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 4d638d760a96eb..bf4c288173ad4f 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -10,10 +10,10 @@ * We assume sprg3 has the physical address of the current * task's thread_struct. */ -.macro EXCEPTION_PROLOG name handle_dar_dsisr=0 +.macro EXCEPTION_PROLOG trapno name handle_dar_dsisr=0 EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 \name handle_dar_dsisr=\handle_dar_dsisr + EXCEPTION_PROLOG_2 \trapno \name handle_dar_dsisr=\handle_dar_dsisr .endm .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0 @@ -56,7 +56,7 @@ #endif .endm -.macro EXCEPTION_PROLOG_2 name handle_dar_dsisr=0 +.macro EXCEPTION_PROLOG_2 trapno name handle_dar_dsisr=0 #ifdef CONFIG_PPC_8xx .if \handle_dar_dsisr li r11, RPN_PATTERN @@ -108,6 +108,8 @@ lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10,r10,STACK_FRAME_REGS_MARKER@l stw r10,8(r11) + li r10, \trapno + stw r10,_TRAP(r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) _ASM_NOKPROBE_SYMBOL(\name\()_virt) @@ -182,12 +184,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #define EXCEPTION(n, label, hdlr, xfer) \ START_EXCEPTION(n, label) \ - EXCEPTION_PROLOG label; \ + EXCEPTION_PROLOG n label; \ xfer(n, hdlr) #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ bl tfer; \ bl hdlr; \ b ret @@ -213,7 +213,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #endif lwz r1, emergency_ctx@l(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE - EXCEPTION_PROLOG_2 vmap_stack_overflow + EXCEPTION_PROLOG_2 0 vmap_stack_overflow EXC_XFER_STD(0, stack_overflow_exception) .endm diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a657783807043d..7270caff665c52 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -104,7 +104,7 @@ _ENTRY(crit_esr) * Instead we use a couple of words of memory at low physical addresses. * This is OK since we don't support SMP on these processors. */ -.macro CRITICAL_EXCEPTION_PROLOG name +.macro CRITICAL_EXCEPTION_PROLOG trapno name stw r10,crit_r10@l(0) /* save two registers to work with */ stw r11,crit_r11@l(0) mfspr r10,SPRN_SRR0 @@ -161,6 +161,8 @@ _ENTRY(crit_esr) lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10, r10, STACK_FRAME_REGS_MARKER@l stw r10, 8(r11) + li r10, \trapno + 2 + stw r10,_TRAP(r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) _ASM_NOKPROBE_SYMBOL(\name\()_virt) @@ -184,7 +186,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) */ #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ - CRITICAL_EXCEPTION_PROLOG label; \ + CRITICAL_EXCEPTION_PROLOG n label; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ crit_transfer_to_handler, ret_from_crit_exc) @@ -206,7 +208,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) * if they can't resolve the lightweight TLB fault. */ START_EXCEPTION(0x0300, DataStorage) - EXCEPTION_PROLOG DataStorage handle_dar_dsisr=1 + EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1 EXC_XFER_LITE(0x300, do_page_fault) /* @@ -214,7 +216,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) * This is caused by a fetch from non-execute or guarded pages. */ START_EXCEPTION(0x0400, InstructionAccess) - EXCEPTION_PROLOG InstructionAccess + EXCEPTION_PROLOG 0x400 InstructionAccess li r5,0 stw r5, _ESR(r11) /* Zero ESR */ stw r12, _DEAR(r11) /* SRR0 as DEAR */ @@ -225,12 +227,12 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) - EXCEPTION_PROLOG Alignment handle_dar_dsisr=1 + EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 EXC_XFER_STD(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) - EXCEPTION_PROLOG ProgramCheck handle_dar_dsisr=1 + EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1 EXC_XFER_STD(0x700, program_check_exception) EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) @@ -449,7 +451,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) */ /* 0x2000 - Debug Exception */ START_EXCEPTION(0x2000, DebugTrap) - CRITICAL_EXCEPTION_PROLOG DebugTrap + CRITICAL_EXCEPTION_PROLOG 0x2000 DebugTrap /* * If this is a single step or branch-taken exception in an @@ -498,7 +500,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ __HEAD Decrementer: - EXCEPTION_PROLOG Decrementer + EXCEPTION_PROLOG 0x1000 Decrementer lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ EXC_XFER_LITE(0x1000, timer_interrupt) @@ -506,13 +508,13 @@ Decrementer: /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ __HEAD FITException: - EXCEPTION_PROLOG FITException + EXCEPTION_PROLOG 0x1010 FITException EXC_XFER_STD(0x1010, unknown_exception) /* Watchdog Timer (WDT) Exception. (from 0x1020) */ __HEAD WDTException: - CRITICAL_EXCEPTION_PROLOG WDTException + CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 4078d0dc2f1893..c48de97f42fc13 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -122,7 +122,7 @@ instruction_counter: /* Machine check */ START_EXCEPTION(0x200, MachineCheck) - EXCEPTION_PROLOG MachineCheck handle_dar_dsisr=1 + EXCEPTION_PROLOG 0x200 MachineCheck handle_dar_dsisr=1 EXC_XFER_STD(0x200, machine_check_exception) /* External interrupt */ @@ -130,7 +130,7 @@ instruction_counter: /* Alignment exception */ START_EXCEPTION(0x600, Alignment) - EXCEPTION_PROLOG Alignment handle_dar_dsisr=1 + EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ @@ -292,12 +292,12 @@ instruction_counter: * addresses. There is nothing to do but handle a big time error fault. */ START_EXCEPTION(0x1300, InstructionTLBError) - EXCEPTION_PROLOG InstructionTLBError + /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ + EXCEPTION_PROLOG 0x400 InstructionTLBError andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ andis. r10,r9,SRR1_ISI_NOPT@h beq+ .Litlbie tlbie r12 - /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ .Litlbie: stw r12, _DAR(r11) stw r5, _DSISR(r11) @@ -314,14 +314,14 @@ instruction_counter: beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 DataTLBError handle_dar_dsisr=1 + /* 0x300 is DataAccess exception, needed by bad_page_fault() */ + EXCEPTION_PROLOG_2 0x300 DataTLBError handle_dar_dsisr=1 lwz r4, _DAR(r11) lwz r5, _DSISR(r11) andis. r10,r5,DSISR_NOHPTE@h beq+ .Ldtlbie tlbie r4 .Ldtlbie: - /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, do_page_fault) #ifdef CONFIG_VMAP_STACK @@ -345,7 +345,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ rfi 1: EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 DataBreakpoint handle_dar_dsisr=1 + EXCEPTION_PROLOG_2 0x1c00 DataBreakpoint handle_dar_dsisr=1 mfspr r4,SPRN_BAR stw r4,_DAR(r11) EXC_XFER_STD(0x1c00, do_break) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index e78b4a7c23af6e..2b4875ceaea43e 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -266,7 +266,7 @@ __secondary_hold_acknowledge: mfspr r1, SPRN_SPRG_SCRATCH2 #endif /* CONFIG_PPC_CHRP */ EXCEPTION_PROLOG_1 -7: EXCEPTION_PROLOG_2 MachineCheck +7: EXCEPTION_PROLOG_2 0x200 MachineCheck #ifdef CONFIG_PPC_CHRP beq cr1, 1f twi 31, 0, 0 @@ -295,7 +295,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) #endif 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 DataAccess handle_dar_dsisr=1 + EXCEPTION_PROLOG_2 0x300 DataAccess handle_dar_dsisr=1 lwz r5, _DSISR(r11) andis. r0, r5, DSISR_DABRMATCH@h bne- 1f @@ -324,7 +324,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) andi. r11, r11, MSR_PR EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 InstructionAccess + EXCEPTION_PROLOG_2 0x400 InstructionAccess andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r5, _DSISR(r11) stw r12, _DAR(r11) @@ -335,7 +335,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) /* Alignment exception */ START_EXCEPTION(0x600, Alignment) - EXCEPTION_PROLOG Alignment handle_dar_dsisr=1 + EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ @@ -351,7 +351,7 @@ BEGIN_FTR_SECTION */ b ProgramCheck END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) - EXCEPTION_PROLOG FPUnavailable + EXCEPTION_PROLOG 0x800 FPUnavailable beq 1f bl load_up_fpu /* if from user, just load it up */ b fast_exception_return @@ -713,7 +713,7 @@ fast_hash_page_return: __HEAD AltiVecUnavailable: - EXCEPTION_PROLOG AltiVecUnavailable + EXCEPTION_PROLOG 0xf20 AltiVecUnavailable #ifdef CONFIG_ALTIVEC beq 1f bl load_up_altivec /* if from user, just load it up */ @@ -723,7 +723,7 @@ AltiVecUnavailable: __HEAD PerformanceMonitor: - EXCEPTION_PROLOG PerformanceMonitor + EXCEPTION_PROLOG 0xf00 PerformanceMonitor EXC_XFER_STD(0xf00, performance_monitor_exception) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 036a69d1660599..27a7358c04bb75 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -44,7 +44,7 @@ END_BTB_FLUSH_SECTION #endif -#define NORMAL_EXCEPTION_PROLOG(intno) \ +#define NORMAL_EXCEPTION_PROLOG(trapno, intno) \ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ mfspr r10, SPRN_SPRG_THREAD; \ stw r11, THREAD_NORMSAVE(0)(r10); \ @@ -82,6 +82,8 @@ END_BTB_FLUSH_SECTION lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \ addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ stw r10, 8(r11); \ + li r10, trapno; \ + stw r10,_TRAP(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) @@ -182,7 +184,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) * registers as the normal prolog above. Instead we use a portion of the * critical/machine check exception stack at low physical addresses. */ -#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \ +#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, trapno, intno, exc_level_srr0, exc_level_srr1) \ mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \ BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \ stw r9,GPR9(r8); /* save various registers */\ @@ -225,6 +227,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r1,0(r11); \ mr r1,r11; \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ + li r10, trapno; \ + stw r10,_TRAP(r11); \ stw r0,GPR0(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) @@ -259,12 +263,12 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif .endm -#define CRITICAL_EXCEPTION_PROLOG(intno) \ - EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1) -#define DEBUG_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1) -#define MCHECK_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \ +#define CRITICAL_EXCEPTION_PROLOG(trapno, intno) \ + EXC_LEVEL_EXCEPTION_PROLOG(CRIT, trapno+2, intno, SPRN_CSRR0, SPRN_CSRR1) +#define DEBUG_EXCEPTION_PROLOG(trapno) \ + EXC_LEVEL_EXCEPTION_PROLOG(DBG, trapno+8, DEBUG, SPRN_DSRR0, SPRN_DSRR1) +#define MCHECK_EXCEPTION_PROLOG(trapno) \ + EXC_LEVEL_EXCEPTION_PROLOG(MC, trapno+4, MACHINE_CHECK, \ SPRN_MCSRR0, SPRN_MCSRR1) /* @@ -293,12 +297,12 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define EXCEPTION(n, intno, label, hdlr, xfer) \ START_EXCEPTION(label); \ - NORMAL_EXCEPTION_PROLOG(intno); \ + NORMAL_EXCEPTION_PROLOG(n, intno); \ xfer(n, hdlr) #define CRITICAL_EXCEPTION(n, intno, label, hdlr) \ START_EXCEPTION(label); \ - CRITICAL_EXCEPTION_PROLOG(intno); \ + CRITICAL_EXCEPTION_PROLOG(n, intno); \ SAVE_MMU_REGS; \ SAVE_xSRR(SRR); \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ @@ -306,7 +310,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define MCHECK_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(label); \ - MCHECK_EXCEPTION_PROLOG; \ + MCHECK_EXCEPTION_PROLOG(n); \ mfspr r5,SPRN_ESR; \ stw r5,_ESR(r11); \ SAVE_xSRR(DSRR); \ @@ -317,8 +321,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mcheck_transfer_to_handler, ret_from_mcheck_exc) #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ bl tfer; \ bl hdlr; \ b ret; \ @@ -346,7 +348,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) */ #define DEBUG_DEBUG_EXCEPTION \ START_EXCEPTION(DebugDebug); \ - DEBUG_EXCEPTION_PROLOG; \ + DEBUG_EXCEPTION_PROLOG(2000); \ \ /* \ * If there is a single step or branch-taken exception in an \ @@ -402,7 +404,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ - CRITICAL_EXCEPTION_PROLOG(DEBUG); \ + CRITICAL_EXCEPTION_PROLOG(2000,DEBUG); \ \ /* \ * If there is a single step or branch-taken exception in an \ @@ -457,7 +459,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define DATA_STORAGE_EXCEPTION \ START_EXCEPTION(DataStorage) \ - NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \ + NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ @@ -466,7 +468,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ - NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \ + NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \ @@ -474,28 +476,28 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ - NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \ + NORMAL_EXCEPTION_PROLOG(0x600, ALIGNMENT); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ EXC_XFER_STD(0x0600, alignment_exception) #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ - NORMAL_EXCEPTION_PROLOG(PROGRAM); \ + NORMAL_EXCEPTION_PROLOG(0x700, PROGRAM); \ mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ stw r4,_ESR(r11); \ EXC_XFER_STD(0x0700, program_check_exception) #define DECREMENTER_EXCEPTION \ START_EXCEPTION(Decrementer) \ - NORMAL_EXCEPTION_PROLOG(DECREMENTER); \ + NORMAL_EXCEPTION_PROLOG(0x900, DECREMENTER); \ lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ EXC_XFER_LITE(0x0900, timer_interrupt) #define FP_UNAVAILABLE_EXCEPTION \ START_EXCEPTION(FloatingPointUnavailable) \ - NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \ + NORMAL_EXCEPTION_PROLOG(0x800, FP_UNAVAIL); \ beq 1f; \ bl load_up_fpu; /* if from user, just load it up */ \ b fast_exception_return; \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 72e9aa45b99b7f..bf2730b4e43ba6 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -363,7 +363,7 @@ interrupt_base: /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) - NORMAL_EXCEPTION_PROLOG(DATA_STORAGE) + NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE) mfspr r5,SPRN_ESR /* Grab the ESR, save it */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it */ @@ -613,7 +613,7 @@ END_BTB_FLUSH_SECTION #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) - NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) + NORMAL_EXCEPTION_PROLOG(0x2010, SPE_UNAVAIL) beq 1f bl load_up_spe b fast_exception_return From bce4c26a4e324cb096a3768cdc3aad4e2552c3d0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:39 +0000 Subject: [PATCH 065/302] powerpc/32: Add a prepare_transfer_to_handler macro for exception prologs In order to increase flexibility, add a macro that will for now call transfer_to_handler. As transfer_to_handler doesn't do the actual transfer anymore, also name it prepare_transfer_to_handler. The following patches will progressively remove the use of transfer_to_handler label. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7f757c52518ab1d7b27ad5113b10f860e803f467.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 3 +++ arch/powerpc/kernel/head_32.h | 4 ++++ arch/powerpc/kernel/head_booke.h | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 76e1502b3e6f73..237b753720db43 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -86,6 +86,8 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) .globl transfer_to_handler transfer_to_handler: + .globl prepare_transfer_to_handler +prepare_transfer_to_handler: SAVE_NVGPRS(r11) addi r3,r1,STACK_FRAME_OVERHEAD stw r2,GPR2(r11) @@ -133,6 +135,7 @@ transfer_to_handler_cont: lwz r2, GPR2(r11) b fast_exception_return #endif +_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) _ASM_NOKPROBE_SYMBOL(transfer_to_handler) _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index bf4c288173ad4f..3ab0f3ad9a6a79 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -115,6 +115,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) .endm +.macro prepare_transfer_to_handler + bl prepare_transfer_to_handler +.endm + .macro SYSCALL_ENTRY trapno mfspr r9, SPRN_SRR1 mfspr r10, SPRN_SRR0 diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 27a7358c04bb75..0f02b970e797b8 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -87,6 +87,10 @@ END_BTB_FLUSH_SECTION SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) +.macro prepare_transfer_to_handler + bl prepare_transfer_to_handler +.endm + .macro SYSCALL_ENTRY trapno intno srr1 mfspr r10, SPRN_SPRG_THREAD #ifdef CONFIG_KVM_BOOKE_HV From 8f6ff5bd9b73a7912356f378adfb85e9a4e7ce65 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:40 +0000 Subject: [PATCH 066/302] powerpc/32: Only restore non volatile registers when required Until now, non volatile registers were restored everytime they were saved, ie using EXC_XFER_STD meant saving and restoring them while EXC_XFER_LITE meant neither saving not restoring them. Now that they are always saved, EXC_XFER_STD means to restore them and EXC_XFER_LITE means to not restore them. Most of the users of EXC_XFER_STD only need to retrieve the non volatile registers. For them there is no need to restore the non volatile registers as they have not been modified. Only very few exceptions require non volatile registers restore. Opencode the few places which require saving of non volatile registers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d1cb12d8023cc6afc1f07150565571373c04945c.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 1 - arch/powerpc/kernel/head_40x.S | 10 ++++++++-- arch/powerpc/kernel/head_8xx.S | 24 ++++++++++++++++++++---- arch/powerpc/kernel/head_book3s_32.S | 17 ++++++++++++++--- arch/powerpc/kernel/head_booke.h | 10 ++++++++-- arch/powerpc/kernel/head_fsl_booke.S | 16 ++++++++++++---- 6 files changed, 62 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 237b753720db43..e6adb6882bde55 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -358,7 +358,6 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return) .globl ret_from_except_full ret_from_except_full: - REST_NVGPRS(r1) /* fall through */ .globl ret_from_except diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7270caff665c52..f3e5b462113f04 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -228,12 +228,18 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 - EXC_XFER_STD(0x600, alignment_exception) + prepare_transfer_to_handler + bl alignment_exception + REST_NVGPRS(r1) + b interrupt_return /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1 - EXC_XFER_STD(0x700, program_check_exception) + prepare_transfer_to_handler + bl program_check_exception + REST_NVGPRS(r1) + b interrupt_return EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c48de97f42fc13..86f844eb0e5aa0 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -131,10 +131,18 @@ instruction_counter: /* Alignment exception */ START_EXCEPTION(0x600, Alignment) EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 - EXC_XFER_STD(0x600, alignment_exception) + prepare_transfer_to_handler + bl alignment_exception + REST_NVGPRS(r1) + b interrupt_return /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + START_EXCEPTION(0x700, ProgramCheck) + EXCEPTION_PROLOG 0x700 ProgramCheck + prepare_transfer_to_handler + bl program_check_exception + REST_NVGPRS(r1) + b interrupt_return /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) @@ -149,7 +157,12 @@ instruction_counter: /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) + START_EXCEPTION(0x1000, SoftEmu) + EXCEPTION_PROLOG 0x1000 SoftEmu + prepare_transfer_to_handler + bl emulation_assist_interrupt + REST_NVGPRS(r1) + b interrupt_return /* * For the MPC8xx, this is a software tablewalk to load the instruction @@ -348,7 +361,10 @@ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_2 0x1c00 DataBreakpoint handle_dar_dsisr=1 mfspr r4,SPRN_BAR stw r4,_DAR(r11) - EXC_XFER_STD(0x1c00, do_break) + prepare_transfer_to_handler + bl do_break + REST_NVGPRS(r1) + b interrupt_return #ifdef CONFIG_PERF_EVENTS START_EXCEPTION(0x1d00, InstructionBreakpoint) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 2b4875ceaea43e..425a4f20ceacd6 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -300,7 +300,10 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) andis. r0, r5, DSISR_DABRMATCH@h bne- 1f EXC_XFER_LITE(0x300, do_page_fault) -1: EXC_XFER_STD(0x300, do_break) +1: prepare_transfer_to_handler + bl do_break + REST_NVGPRS(r1) + b interrupt_return /* Instruction access exception. */ @@ -336,10 +339,18 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) /* Alignment exception */ START_EXCEPTION(0x600, Alignment) EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 - EXC_XFER_STD(0x600, alignment_exception) + prepare_transfer_to_handler + bl alignment_exception + REST_NVGPRS(r1) + b interrupt_return /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + START_EXCEPTION(0x700, ProgramCheck) + EXCEPTION_PROLOG 0x700 ProgramCheck + prepare_transfer_to_handler + bl program_check_exception + REST_NVGPRS(r1) + b interrupt_return /* Floating-point unavailable */ START_EXCEPTION(0x800, FPUnavailable) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 0f02b970e797b8..baf10556c5871e 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -483,14 +483,20 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) NORMAL_EXCEPTION_PROLOG(0x600, ALIGNMENT); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ - EXC_XFER_STD(0x0600, alignment_exception) + prepare_transfer_to_handler; \ + bl alignment_exception; \ + REST_NVGPRS(r1); \ + b interrupt_return #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ NORMAL_EXCEPTION_PROLOG(0x700, PROGRAM); \ mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ stw r4,_ESR(r11); \ - EXC_XFER_STD(0x0700, program_check_exception) + prepare_transfer_to_handler; \ + bl program_check_exception; \ + REST_NVGPRS(r1); \ + b interrupt_return #define DECREMENTER_EXCEPTION \ START_EXCEPTION(Decrementer) \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index bf2730b4e43ba6..210871b2eb41cd 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -625,12 +625,20 @@ END_BTB_FLUSH_SECTION /* SPE Floating Point Data */ #ifdef CONFIG_SPE - EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, - SPEFloatingPointException, EXC_XFER_STD) + START_EXCEPTION(SPEFloatingPointData) + NORMAL_EXCEPTION_PROLOG(0x2030, SPE_FP_DATA) + prepare_transfer_to_handler + bl SPEFloatingPointException + REST_NVGPRS(r1) + b interrupt_return /* SPE Floating Point Round */ - EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - SPEFloatingPointRoundException, EXC_XFER_STD) + START_EXCEPTION(SPEFloatingPointRound) + NORMAL_EXCEPTION_PROLOG(0x2050, SPE_FP_ROUND) + prepare_transfer_to_handler + bl SPEFloatingPointRoundException + REST_NVGPRS(r1) + b interrupt_return #elif defined(CONFIG_SPE_POSSIBLE) EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception, EXC_XFER_STD) From 4c0104a83fc3990a76a01a2f4e504251fa9814c4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:41 +0000 Subject: [PATCH 067/302] powerpc/32: Dismantle EXC_XFER_STD/LITE/TEMPLATE In order to get more control in exception prolog, dismantle all non standard exception macros, finishing with EXC_XFER_STD and EXC_XFER_LITE and EXC_XFER_TEMPLATE. Also remove transfer_to_handler_full and ret_from_except and ret_from_except_full as they are not used anymore. Last parameter of EXCEPTION() is now ignored, will be removed in a later patch to avoid too much churn. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ca5795d04a220586b7037dbbbe6951dfa9e768eb.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 42 +----------------------- arch/powerpc/kernel/head_32.h | 21 ++++-------- arch/powerpc/kernel/head_40x.S | 33 ++++++++++++------- arch/powerpc/kernel/head_8xx.S | 12 +++++-- arch/powerpc/kernel/head_book3s_32.S | 27 ++++++++++----- arch/powerpc/kernel/head_booke.h | 49 +++++++++++++++------------- arch/powerpc/kernel/head_fsl_booke.S | 14 +++++--- 7 files changed, 92 insertions(+), 106 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e6adb6882bde55..bcf8452ebb587d 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -48,30 +48,6 @@ */ .align 12 -#ifdef CONFIG_BOOKE - .globl mcheck_transfer_to_handler -mcheck_transfer_to_handler: - /* fall through */ -_ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler) - - .globl debug_transfer_to_handler -debug_transfer_to_handler: - /* fall through */ -_ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler) - - .globl crit_transfer_to_handler -crit_transfer_to_handler: - /* fall through */ -_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) -#endif - -#ifdef CONFIG_40x - .globl crit_transfer_to_handler -crit_transfer_to_handler: - /* fall through */ -_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) -#endif - /* * This code finishes saving the registers to the exception frame * and jumps to the appropriate handler for the exception, turning @@ -79,13 +55,6 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) * Note that we rely on the caller having set cr0.eq iff the exception * occurred in kernel mode (i.e. MSR:PR = 0). */ - .globl transfer_to_handler_full -transfer_to_handler_full: -_ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) - /* fall through */ - - .globl transfer_to_handler -transfer_to_handler: .globl prepare_transfer_to_handler prepare_transfer_to_handler: SAVE_NVGPRS(r11) @@ -136,7 +105,6 @@ transfer_to_handler_cont: b fast_exception_return #endif _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) -_ASM_NOKPROBE_SYMBOL(transfer_to_handler) _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) .globl transfer_to_syscall @@ -352,18 +320,10 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return) 3: li r10,-1 stw r10,_TRAP(r11) - bl transfer_to_handler_full + prepare_transfer_to_handler bl unrecoverable_exception trap /* should not get here */ - .globl ret_from_except_full -ret_from_except_full: - /* fall through */ - - .globl ret_from_except -ret_from_except: -_ASM_NOKPROBE_SYMBOL(ret_from_except) - .globl interrupt_return interrupt_return: lwz r4,_MSR(r1) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 3ab0f3ad9a6a79..412ede8610f7a7 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -189,20 +189,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #define EXCEPTION(n, label, hdlr, xfer) \ START_EXCEPTION(n, label) \ EXCEPTION_PROLOG n label; \ - xfer(n, hdlr) - -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ - bl tfer; \ - bl hdlr; \ - b ret - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler, \ - ret_from_except) + prepare_transfer_to_handler; \ + bl hdlr; \ + b interrupt_return .macro vmap_stack_overflow_exception __HEAD @@ -218,7 +207,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) lwz r1, emergency_ctx@l(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 0 vmap_stack_overflow - EXC_XFER_STD(0, stack_overflow_exception) + prepare_transfer_to_handler + bl stack_overflow_exception + b interrupt_return .endm #endif /* __HEAD_32_H__ */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index f3e5b462113f04..7eb49ebd6000a3 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -187,8 +187,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ CRITICAL_EXCEPTION_PROLOG n label; \ - EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - crit_transfer_to_handler, ret_from_crit_exc) + prepare_transfer_to_handler; \ + bl hdlr; \ + b ret_from_crit_exc /* * 0x0100 - Critical Interrupt Exception @@ -209,7 +210,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) */ START_EXCEPTION(0x0300, DataStorage) EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1 - EXC_XFER_LITE(0x300, do_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* * 0x0400 - Instruction Storage Exception @@ -220,7 +223,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) li r5,0 stw r5, _ESR(r11) /* Zero ESR */ stw r12, _DEAR(r11) /* SRR0 as DEAR */ - EXC_XFER_LITE(0x400, do_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* 0x0500 - External Interrupt Exception */ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) @@ -499,9 +504,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) /* continue normal handling for a critical exception... */ 2: mfspr r4,SPRN_DBSR stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */ - EXC_XFER_TEMPLATE(DebugException, 0x2002, \ - (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - crit_transfer_to_handler, ret_from_crit_exc) + prepare_transfer_to_handler + bl DebugException + b ret_from_crit_exc /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ __HEAD @@ -509,21 +514,25 @@ Decrementer: EXCEPTION_PROLOG 0x1000 Decrementer lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ - EXC_XFER_LITE(0x1000, timer_interrupt) + prepare_transfer_to_handler + bl timer_interrupt + b interrupt_return /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ __HEAD FITException: EXCEPTION_PROLOG 0x1010 FITException - EXC_XFER_STD(0x1010, unknown_exception) + prepare_transfer_to_handler + bl unknown_exception + b interrupt_return /* Watchdog Timer (WDT) Exception. (from 0x1020) */ __HEAD WDTException: CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException - EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, - (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), - crit_transfer_to_handler, ret_from_crit_exc) + prepare_transfer_to_handler + bl WatchdogException + b ret_from_crit_exc /* Other PowerPC processors, namely those derived from the 6xx-series * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 86f844eb0e5aa0..4d73549722a1d9 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -123,7 +123,9 @@ instruction_counter: /* Machine check */ START_EXCEPTION(0x200, MachineCheck) EXCEPTION_PROLOG 0x200 MachineCheck handle_dar_dsisr=1 - EXC_XFER_STD(0x200, machine_check_exception) + prepare_transfer_to_handler + bl machine_check_exception + b interrupt_return /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) @@ -314,7 +316,9 @@ instruction_counter: .Litlbie: stw r12, _DAR(r11) stw r5, _DSISR(r11) - EXC_XFER_LITE(0x400, do_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to @@ -335,7 +339,9 @@ DARFixed:/* Return from dcbx instruction bug workaround */ beq+ .Ldtlbie tlbie r4 .Ldtlbie: - EXC_XFER_LITE(0x300, do_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return #ifdef CONFIG_VMAP_STACK vmap_stack_overflow_exception diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 425a4f20ceacd6..0a3d7d4a9ec4b1 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -271,7 +271,9 @@ __secondary_hold_acknowledge: beq cr1, 1f twi 31, 0, 0 #endif -1: EXC_XFER_STD(0x200, machine_check_exception) +1: prepare_transfer_to_handler + bl machine_check_exception + b interrupt_return /* Data access exception. */ START_EXCEPTION(0x300, DataAccess) @@ -296,12 +298,13 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 0x300 DataAccess handle_dar_dsisr=1 + prepare_transfer_to_handler lwz r5, _DSISR(r11) andis. r0, r5, DSISR_DABRMATCH@h bne- 1f - EXC_XFER_LITE(0x300, do_page_fault) -1: prepare_transfer_to_handler - bl do_break + bl do_page_fault + b interrupt_return +1: bl do_break REST_NVGPRS(r1) b interrupt_return @@ -331,7 +334,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r5, _DSISR(r11) stw r12, _DAR(r11) - EXC_XFER_LITE(0x400, do_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) @@ -366,7 +371,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) beq 1f bl load_up_fpu /* if from user, just load it up */ b fast_exception_return -1: EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) +1: prepare_transfer_to_handler + bl kernel_fp_unavailable_exception + b interrupt_return #else b ProgramCheck #endif @@ -730,12 +737,16 @@ AltiVecUnavailable: bl load_up_altivec /* if from user, just load it up */ b fast_exception_return #endif /* CONFIG_ALTIVEC */ -1: EXC_XFER_LITE(0xf20, altivec_unavailable_exception) +1: prepare_transfer_to_handler + bl altivec_unavailable_exception + b interrupt_return __HEAD PerformanceMonitor: EXCEPTION_PROLOG 0xf00 PerformanceMonitor - EXC_XFER_STD(0xf00, performance_monitor_exception) + prepare_transfer_to_handler + bl performance_monitor_exception + b interrupt_return __HEAD diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index baf10556c5871e..bc69b9bf61a4a2 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -302,15 +302,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define EXCEPTION(n, intno, label, hdlr, xfer) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(n, intno); \ - xfer(n, hdlr) + prepare_transfer_to_handler; \ + bl hdlr; \ + b interrupt_return #define CRITICAL_EXCEPTION(n, intno, label, hdlr) \ START_EXCEPTION(label); \ CRITICAL_EXCEPTION_PROLOG(n, intno); \ SAVE_MMU_REGS; \ SAVE_xSRR(SRR); \ - EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - crit_transfer_to_handler, ret_from_crit_exc) + prepare_transfer_to_handler; \ + bl hdlr; \ + b ret_from_crit_exc #define MCHECK_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(label); \ @@ -321,21 +324,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) SAVE_xSRR(CSRR); \ SAVE_MMU_REGS; \ SAVE_xSRR(SRR); \ - EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - mcheck_transfer_to_handler, ret_from_mcheck_exc) - -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ - bl tfer; \ + prepare_transfer_to_handler; \ bl hdlr; \ - b ret; \ - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler, \ - ret_from_except) + b ret_from_mcheck_exc /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case @@ -404,7 +395,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) SAVE_xSRR(CSRR); \ SAVE_MMU_REGS; \ SAVE_xSRR(SRR); \ - EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc) + prepare_transfer_to_handler; \ + bl DebugException; \ + b ret_from_debug_exc #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ @@ -459,7 +452,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ SAVE_MMU_REGS; \ SAVE_xSRR(SRR); \ - EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) + prepare_transfer_to_handler; \ + bl DebugException; \ + b ret_from_crit_exc #define DATA_STORAGE_EXCEPTION \ START_EXCEPTION(DataStorage) \ @@ -468,7 +463,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ stw r4, _DEAR(r11); \ - EXC_XFER_LITE(0x0300, do_page_fault) + prepare_transfer_to_handler; \ + bl do_page_fault; \ + b interrupt_return #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ @@ -476,7 +473,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \ - EXC_XFER_LITE(0x0400, do_page_fault) + prepare_transfer_to_handler; \ + bl do_page_fault; \ + b interrupt_return #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ @@ -503,7 +502,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) NORMAL_EXCEPTION_PROLOG(0x900, DECREMENTER); \ lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ - EXC_XFER_LITE(0x0900, timer_interrupt) + prepare_transfer_to_handler; \ + bl timer_interrupt; \ + b interrupt_return #define FP_UNAVAILABLE_EXCEPTION \ START_EXCEPTION(FloatingPointUnavailable) \ @@ -511,7 +512,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) beq 1f; \ bl load_up_fpu; /* if from user, just load it up */ \ b fast_exception_return; \ -1: EXC_XFER_STD(0x800, kernel_fp_unavailable_exception) +1: prepare_transfer_to_handler; \ + bl kernel_fp_unavailable_exception; \ + b interrupt_return #else /* __ASSEMBLY__ */ struct exception_regs { diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 210871b2eb41cd..48d022b1f508fc 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -370,9 +370,13 @@ interrupt_base: stw r4, _DEAR(r11) andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f - EXC_XFER_LITE(0x0300, do_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return 1: - EXC_XFER_LITE(0x0300, CacheLockingException) + prepare_transfer_to_handler + bl CacheLockingException + b interrupt_return /* Instruction Storage Interrupt */ INSTRUCTION_STORAGE_EXCEPTION @@ -617,7 +621,9 @@ END_BTB_FLUSH_SECTION beq 1f bl load_up_spe b fast_exception_return -1: EXC_XFER_LITE(0x2010, KernelSPE) +1: prepare_transfer_to_handler + bl KernelSPE + b interrupt_return #elif defined(CONFIG_SPE_POSSIBLE) EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ unknown_exception, EXC_XFER_STD) @@ -860,7 +866,7 @@ KernelSPE: lwz r5,_NIP(r1) bl printk #endif - b ret_from_except + b interrupt_return #ifdef CONFIG_PRINTK 87: .string "SPE used in kernel (task=%p, pc=%x) \n" #endif From acc142b6230eb2d9cec9b9e3baac1bc074df8ba3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:42 +0000 Subject: [PATCH 068/302] powerpc/32: Remove the xfer parameter in EXCEPTION() macro The xfer parameter is not used anymore, remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/17c7d68bd18f7d2f1ab24a1a20d9ed33bbcda741.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 2 +- arch/powerpc/kernel/head_40x.S | 42 ++++++++-------- arch/powerpc/kernel/head_44x.S | 10 ++-- arch/powerpc/kernel/head_8xx.S | 14 +++--- arch/powerpc/kernel/head_book3s_32.S | 72 ++++++++++++++-------------- arch/powerpc/kernel/head_booke.h | 2 +- arch/powerpc/kernel/head_fsl_booke.S | 28 +++++------ 7 files changed, 81 insertions(+), 89 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 412ede8610f7a7..84e6251622e8be 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -186,7 +186,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #endif -#define EXCEPTION(n, label, hdlr, xfer) \ +#define EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label) \ EXCEPTION_PROLOG n label; \ prepare_transfer_to_handler; \ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7eb49ebd6000a3..52b40bf529c6d2 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -228,7 +228,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) b interrupt_return /* 0x0500 - External Interrupt Exception */ - EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, HardwareInterrupt, do_IRQ) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) @@ -246,19 +246,19 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) REST_NVGPRS(r1) b interrupt_return - EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0800, Trap_08, unknown_exception) + EXCEPTION(0x0900, Trap_09, unknown_exception) + EXCEPTION(0x0A00, Trap_0A, unknown_exception) + EXCEPTION(0x0B00, Trap_0B, unknown_exception) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) SYSCALL_ENTRY 0xc00 /* Trap_0D is commented out to get more space for system call exception */ -/* EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) */ - EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD) +/* EXCEPTION(0x0D00, Trap_0D, unknown_exception) */ + EXCEPTION(0x0E00, Trap_0E, unknown_exception) + EXCEPTION(0x0F00, Trap_0F, unknown_exception) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ START_EXCEPTION(0x1000, DecrementerTrap) @@ -433,19 +433,19 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) mfspr r10, SPRN_SPRG_SCRATCH5 b InstructionAccess - EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1300, Trap_13, unknown_exception) + EXCEPTION(0x1400, Trap_14, unknown_exception) + EXCEPTION(0x1500, Trap_15, unknown_exception) + EXCEPTION(0x1600, Trap_16, unknown_exception) + EXCEPTION(0x1700, Trap_17, unknown_exception) + EXCEPTION(0x1800, Trap_18, unknown_exception) + EXCEPTION(0x1900, Trap_19, unknown_exception) + EXCEPTION(0x1A00, Trap_1A, unknown_exception) + EXCEPTION(0x1B00, Trap_1B, unknown_exception) + EXCEPTION(0x1C00, Trap_1C, unknown_exception) + EXCEPTION(0x1D00, Trap_1D, unknown_exception) + EXCEPTION(0x1E00, Trap_1E, unknown_exception) + EXCEPTION(0x1F00, Trap_1F, unknown_exception) /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 813fa305c33bd3..5c106ac3662603 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -263,8 +263,7 @@ interrupt_base: INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \ - do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, do_IRQ) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -277,7 +276,7 @@ interrupt_base: FP_UNAVAILABLE_EXCEPTION #else EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \ - FloatingPointUnavailable, unknown_exception, EXC_XFER_STD) + FloatingPointUnavailable, unknown_exception) #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) @@ -285,15 +284,14 @@ interrupt_base: /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ - AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD) + AuxillaryProcessorUnavailable, unknown_exception) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, unknown_exception) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 4d73549722a1d9..34feb628c88d29 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -118,7 +118,7 @@ instruction_counter: #endif /* System reset */ - EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD) + EXCEPTION(0x100, Reset, system_reset_exception) /* Machine check */ START_EXCEPTION(0x200, MachineCheck) @@ -128,7 +128,7 @@ instruction_counter: b interrupt_return /* External interrupt */ - EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x500, HardwareInterrupt, do_IRQ) /* Alignment exception */ START_EXCEPTION(0x600, Alignment) @@ -147,14 +147,14 @@ instruction_counter: b interrupt_return /* Decrementer */ - EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) + EXCEPTION(0x900, Decrementer, timer_interrupt) /* System call */ START_EXCEPTION(0xc00, SystemCall) SYSCALL_ENTRY 0xc00 /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xd00, SingleStep, single_step_exception) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. @@ -384,10 +384,10 @@ DARFixed:/* Return from dcbx instruction bug workaround */ mfspr r10, SPRN_SPRG_SCRATCH0 rfi #else - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1d00, Trap_1d, unknown_exception) #endif - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1e00, Trap_1e, unknown_exception) + EXCEPTION(0x1f00, Trap_1f, unknown_exception) __HEAD . = 0x2000 diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 0a3d7d4a9ec4b1..18f4ae163f34a5 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -239,7 +239,7 @@ __secondary_hold_acknowledge: /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and putting it back to what it was (unknown_async_exception) when done. */ - EXCEPTION(0x100, Reset, unknown_async_exception, EXC_XFER_STD) + EXCEPTION(0x100, Reset, unknown_async_exception) /* Machine check */ /* @@ -339,7 +339,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) b interrupt_return /* External interrupt */ - EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x500, HardwareInterrupt, do_IRQ) /* Alignment exception */ START_EXCEPTION(0x600, Alignment) @@ -379,17 +379,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) #endif /* Decrementer */ - EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) + EXCEPTION(0x900, Decrementer, timer_interrupt) - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xa00, Trap_0a, unknown_exception) + EXCEPTION(0xb00, Trap_0b, unknown_exception) /* System call */ START_EXCEPTION(0xc00, SystemCall) SYSCALL_ENTRY 0xc00 - EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xd00, SingleStep, single_step_exception) + EXCEPTION(0xe00, Trap_0e, unknown_exception) /* * The Altivec unavailable trap is at 0x0f20. Foo. @@ -618,35 +618,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #define TAUException unknown_async_exception #endif - EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) - EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD) - EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD) - EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception) + EXCEPTION(0x1400, SMI, SMIException) + EXCEPTION(0x1500, Trap_15, unknown_exception) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception) + EXCEPTION(0x1700, Trap_17, TAUException) + EXCEPTION(0x1800, Trap_18, unknown_exception) + EXCEPTION(0x1900, Trap_19, unknown_exception) + EXCEPTION(0x1a00, Trap_1a, unknown_exception) + EXCEPTION(0x1b00, Trap_1b, unknown_exception) + EXCEPTION(0x1c00, Trap_1c, unknown_exception) + EXCEPTION(0x1d00, Trap_1d, unknown_exception) + EXCEPTION(0x1e00, Trap_1e, unknown_exception) + EXCEPTION(0x1f00, Trap_1f, unknown_exception) + EXCEPTION(0x2000, RunMode, RunModeException) + EXCEPTION(0x2100, Trap_21, unknown_exception) + EXCEPTION(0x2200, Trap_22, unknown_exception) + EXCEPTION(0x2300, Trap_23, unknown_exception) + EXCEPTION(0x2400, Trap_24, unknown_exception) + EXCEPTION(0x2500, Trap_25, unknown_exception) + EXCEPTION(0x2600, Trap_26, unknown_exception) + EXCEPTION(0x2700, Trap_27, unknown_exception) + EXCEPTION(0x2800, Trap_28, unknown_exception) + EXCEPTION(0x2900, Trap_29, unknown_exception) + EXCEPTION(0x2a00, Trap_2a, unknown_exception) + EXCEPTION(0x2b00, Trap_2b, unknown_exception) + EXCEPTION(0x2c00, Trap_2c, unknown_exception) + EXCEPTION(0x2d00, Trap_2d, unknown_exception) + EXCEPTION(0x2e00, Trap_2e, unknown_exception) + EXCEPTION(0x2f00, Trap_2f, unknown_exception) __HEAD . = 0x3000 diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index bc69b9bf61a4a2..fa566e89f18bd6 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -299,7 +299,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) .align 5; \ label: -#define EXCEPTION(n, intno, label, hdlr, xfer) \ +#define EXCEPTION(n, intno, label, hdlr) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(n, intno); \ prepare_transfer_to_handler; \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 48d022b1f508fc..3efc5baa801a9c 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -382,7 +382,7 @@ interrupt_base: INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -394,8 +394,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception) #endif /* System Call Interrupt */ @@ -403,16 +402,14 @@ interrupt_base: SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1 /* Auxiliary Processor Unavailable Interrupt */ - EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, unknown_exception) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x3100, FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x3100, FIT, FixedIntervalTimer, unknown_exception) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT @@ -625,8 +622,7 @@ END_BTB_FLUSH_SECTION bl KernelSPE b interrupt_return #elif defined(CONFIG_SPE_POSSIBLE) - EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, unknown_exception) #endif /* CONFIG_SPE_POSSIBLE */ /* SPE Floating Point Data */ @@ -646,18 +642,16 @@ END_BTB_FLUSH_SECTION REST_NVGPRS(r1) b interrupt_return #elif defined(CONFIG_SPE_POSSIBLE) - EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, - unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception) + EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, unknown_exception) #endif /* CONFIG_SPE_POSSIBLE */ /* Performance Monitor */ EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ - performance_monitor_exception, EXC_XFER_STD) + performance_monitor_exception) - EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD) + EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception) CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \ CriticalDoorbell, unknown_exception) @@ -672,10 +666,10 @@ END_BTB_FLUSH_SECTION unknown_exception) /* Hypercall */ - EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD) + EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception) /* Embedded Hypervisor Privilege */ - EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD) + EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception) interrupt_end: From a305597850c96e2f2d349533cf3b514fa4b7b9f8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:43 +0000 Subject: [PATCH 069/302] powerpc/32: Refactor saving of volatile registers in exception prologs Exception prologs all do the same at the end: - Save trapno in stack - Mark stack with exception marker - Save r0 - Save r3 to r8 Refactor that into a COMMON_EXCEPTION_PROLOG_END macro. At the same time use r1 instead of r11. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e1c45d2e895e0693c42d2a6840df1105a148efea.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 16 ++++++++++------ arch/powerpc/kernel/head_40x.S | 9 +-------- arch/powerpc/kernel/head_booke.h | 26 +++++++++++++------------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 84e6251622e8be..ba20bfabdf63b8 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -104,15 +104,19 @@ li r10, MSR_KERNEL /* can take exceptions */ mtmsr r10 /* (except for mach check in rtas) */ #endif - stw r0,GPR0(r11) + COMMON_EXCEPTION_PROLOG_END \trapno +_ASM_NOKPROBE_SYMBOL(\name\()_virt) +.endm + +.macro COMMON_EXCEPTION_PROLOG_END trapno + stw r0,GPR0(r1) lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10,r10,STACK_FRAME_REGS_MARKER@l - stw r10,8(r11) + stw r10,8(r1) li r10, \trapno - stw r10,_TRAP(r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) -_ASM_NOKPROBE_SYMBOL(\name\()_virt) + stw r10,_TRAP(r1) + SAVE_4GPRS(3, r1) + SAVE_2GPRS(7, r1) .endm .macro prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 52b40bf529c6d2..e1360b88b6cb86 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -157,14 +157,7 @@ _ENTRY(crit_esr) mfspr r12,SPRN_SRR2 mfspr r9,SPRN_SRR3 rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ - stw r0,GPR0(r11) - lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ - addi r10, r10, STACK_FRAME_REGS_MARKER@l - stw r10, 8(r11) - li r10, \trapno + 2 - stw r10,_TRAP(r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) + COMMON_EXCEPTION_PROLOG_END \trapno + 2 _ASM_NOKPROBE_SYMBOL(\name\()_virt) .endm diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index fa566e89f18bd6..4d583fbef0b6f3 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -78,14 +78,18 @@ END_BTB_FLUSH_SECTION stw r1, 0(r11); \ mr r1, r11; \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11); \ - lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \ - addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ - stw r10, 8(r11); \ - li r10, trapno; \ - stw r10,_TRAP(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) + COMMON_EXCEPTION_PROLOG_END trapno + +.macro COMMON_EXCEPTION_PROLOG_END trapno + stw r0,GPR0(r1) + lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + addi r10, r10, STACK_FRAME_REGS_MARKER@l + stw r10, 8(r1) + li r10, \trapno + stw r10,_TRAP(r1) + SAVE_4GPRS(3, r1) + SAVE_2GPRS(7, r1) +.endm .macro prepare_transfer_to_handler bl prepare_transfer_to_handler @@ -231,11 +235,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r1,0(r11); \ mr r1,r11; \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - li r10, trapno; \ - stw r10,_TRAP(r11); \ - stw r0,GPR0(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) + COMMON_EXCEPTION_PROLOG_END trapno #define SAVE_xSRR(xSRR) \ mfspr r0,SPRN_##xSRR##0; \ From 16db54369df614bf386aa31a6730c5bdb1bf4ffd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:44 +0000 Subject: [PATCH 070/302] powerpc/32: Save remaining registers in exception prolog Save non volatile registers, XER, CTR, MSR and NIP in exception prolog. Also assign proper value to r2 and r3 there. For now, recalculate thread pointer in prepare_transfer_to_handler. It will disappear once KUAP is ported to C. And remove the comment which is now completely wrong. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/56f0cde9dd0362edf2ddba4d887552013eee7329.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 27 ++++----------------------- arch/powerpc/kernel/head_32.h | 12 ++++++++++++ arch/powerpc/kernel/head_booke.h | 12 ++++++++++++ 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index bcf8452ebb587d..46b3854e73189c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -48,29 +48,11 @@ */ .align 12 -/* - * This code finishes saving the registers to the exception frame - * and jumps to the appropriate handler for the exception, turning - * on address translation. - * Note that we rely on the caller having set cr0.eq iff the exception - * occurred in kernel mode (i.e. MSR:PR = 0). - */ .globl prepare_transfer_to_handler prepare_transfer_to_handler: - SAVE_NVGPRS(r11) - addi r3,r1,STACK_FRAME_OVERHEAD - stw r2,GPR2(r11) - stw r12,_NIP(r11) - stw r9,_MSR(r11) - andi. r2,r9,MSR_PR - mfctr r12 - mfspr r2,SPRN_XER - stw r12,_CTR(r11) - stw r2,_XER(r11) - mfspr r12,SPRN_SPRG_THREAD - tovirt(r12, r12) + andi. r12,r9,MSR_PR + addi r12,r2,THREAD beq 2f - addi r2, r12, -THREAD #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif @@ -78,8 +60,7 @@ prepare_transfer_to_handler: /* if from kernel, check interrupted DOZE/NAP mode */ 2: - kuap_save_and_lock r11, r12, r9, r2, r6 - addi r2, r12, -THREAD + kuap_save_and_lock r11, r12, r9, r5, r6 #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 @@ -293,7 +274,7 @@ fast_exception_return: 2: REST_4GPRS(3, r11) lwz r10,_CCR(r11) - REST_GPR(1, r11) + REST_2GPRS(1, r11) mtcr r10 lwz r10,_LINK(r11) mtlr r10 diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index ba20bfabdf63b8..80e45c800496c3 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -117,6 +117,18 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) stw r10,_TRAP(r1) SAVE_4GPRS(3, r1) SAVE_2GPRS(7, r1) + SAVE_NVGPRS(r1) + stw r2,GPR2(r1) + stw r12,_NIP(r1) + stw r9,_MSR(r1) + mfctr r10 + mfspr r2,SPRN_SPRG_THREAD + stw r10,_CTR(r1) + tovirt(r2, r2) + mfspr r10,SPRN_XER + addi r2, r2, -THREAD + stw r10,_XER(r1) + addi r3,r1,STACK_FRAME_OVERHEAD .endm .macro prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 4d583fbef0b6f3..7f3dd5fae51d3f 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -89,6 +89,18 @@ END_BTB_FLUSH_SECTION stw r10,_TRAP(r1) SAVE_4GPRS(3, r1) SAVE_2GPRS(7, r1) + SAVE_NVGPRS(r1) + stw r2,GPR2(r1) + stw r12,_NIP(r1) + stw r9,_MSR(r1) + mfctr r10 + mfspr r2,SPRN_SPRG_THREAD + stw r10,_CTR(r1) + tovirt(r2, r2) + mfspr r10,SPRN_XER + addi r2, r2, -THREAD + stw r10,_XER(r1) + addi r3,r1,STACK_FRAME_OVERHEAD .endm .macro prepare_transfer_to_handler From a5d33be0512b4565808a3aed05567cb56c0e6ad0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:45 +0000 Subject: [PATCH 071/302] powerpc/32: Return directly from power_save_ppc32_restore() transfer_to_handler_cont: is now just a blr. Directly perform blr in power_save_ppc32_restore(). Also remove useless setting of r11 in e500 version of power_save_ppc32_restore(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e337506e08a4df95b11d2290104b92f0dcdb5548.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 3 --- arch/powerpc/kernel/idle_6xx.S | 2 +- arch/powerpc/kernel/idle_e500.S | 10 +--------- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 46b3854e73189c..b9a2935efec1f8 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -67,8 +67,6 @@ prepare_transfer_to_handler: bt- 31-TLF_NAPPING,4f bt- 31-TLF_SLEEPING,7f #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ - .globl transfer_to_handler_cont -transfer_to_handler_cont: blr #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) @@ -86,7 +84,6 @@ transfer_to_handler_cont: b fast_exception_return #endif _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) -_ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) .globl transfer_to_syscall transfer_to_syscall: diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 153366e178c4b5..13cad9297d8222 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -176,7 +176,7 @@ BEGIN_FTR_SECTION lwz r9,nap_save_hid1@l(r9) mtspr SPRN_HID1, r9 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) - b transfer_to_handler_cont + blr _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) .data diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 7795727e7f08e2..9e1bc4502c507d 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -81,13 +81,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) _GLOBAL(power_save_ppc32_restore) lwz r9,_LINK(r11) /* interrupted in e500_idle */ stw r9,_NIP(r11) /* make it do a blr */ - -#ifdef CONFIG_SMP - lwz r11,TASK_CPU(r2) /* get cpu number * 4 */ - slwi r11,r11,2 -#else - li r11,0 -#endif - - b transfer_to_handler_cont + blr _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) From a2b3e09ae41c71d27d9b8da9baf31e0d9a97b864 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:46 +0000 Subject: [PATCH 072/302] powerpc/32: Only use prepare_transfer_to_handler function on book3s/32 and e500 Only book3s/32 and e500 have significative work to do in prepare_transfer_to_handler. Other 32 bit have nothing to do at all. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b5e29ca0e557c11340415a13fe8b107189d315e1.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 6 ++---- arch/powerpc/kernel/head_32.h | 2 ++ arch/powerpc/kernel/head_booke.h | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index b9a2935efec1f8..44d0eddf873839 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -48,6 +48,7 @@ */ .align 12 +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) .globl prepare_transfer_to_handler prepare_transfer_to_handler: andi. r12,r9,MSR_PR @@ -61,15 +62,12 @@ prepare_transfer_to_handler: /* if from kernel, check interrupted DOZE/NAP mode */ 2: kuap_save_and_lock r11, r12, r9, r5, r6 -#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 bt- 31-TLF_NAPPING,4f bt- 31-TLF_SLEEPING,7f -#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ blr -#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING stw r12,TI_LOCAL_FLAGS(r2) b power_save_ppc32_restore @@ -82,8 +80,8 @@ prepare_transfer_to_handler: kuap_restore r11, r2, r3, r4, r5 lwz r2, GPR2(r11) b fast_exception_return -#endif _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) +#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ .globl transfer_to_syscall transfer_to_syscall: diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 80e45c800496c3..c018fcdf915741 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -132,7 +132,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) .endm .macro prepare_transfer_to_handler +#ifdef CONFIG_PPC_BOOK3S_32 bl prepare_transfer_to_handler +#endif .endm .macro SYSCALL_ENTRY trapno diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 7f3dd5fae51d3f..cb96ae002af6d9 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -104,7 +104,9 @@ END_BTB_FLUSH_SECTION .endm .macro prepare_transfer_to_handler +#ifdef CONFIG_E500 bl prepare_transfer_to_handler +#endif .endm .macro SYSCALL_ENTRY trapno intno srr1 From b5efec00b671c5d7e9cb9e73a1d4925dd6ce8dcd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:47 +0000 Subject: [PATCH 073/302] powerpc/32s: Move KUEP locking/unlocking in C This can be done in C, do it. Unrolling the loop gains approx. 15% performance. From now on, prepare_transfer_to_handler() is only for interrupts from kernel. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4eadd873927e9a73c3d1dfe2f9497353465514cf.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 31 ------------------ arch/powerpc/include/asm/interrupt.h | 6 +++- arch/powerpc/include/asm/kup.h | 8 +++++ arch/powerpc/kernel/entry_32.S | 16 ---------- arch/powerpc/kernel/head_32.h | 3 ++ arch/powerpc/kernel/head_booke.h | 3 ++ arch/powerpc/kernel/interrupt.c | 4 +++ arch/powerpc/mm/book3s32/Makefile | 1 + arch/powerpc/mm/book3s32/kuep.c | 40 ++++++++++++++++++++++++ 9 files changed, 64 insertions(+), 48 deletions(-) create mode 100644 arch/powerpc/mm/book3s32/kuep.c diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 73bc5d2c431dfa..b97ea60f6fa302 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -7,37 +7,6 @@ #ifdef __ASSEMBLY__ -.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */ -101: mtsrin \gpr1, \gpr2 - addi \gpr1, \gpr1, 0x111 /* next VSID */ - rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ - addis \gpr2, \gpr2, 0x1000 /* address of next segment */ - bdnz 101b - isync -.endm - -.macro kuep_lock gpr1, gpr2 -#ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 - oris \gpr1, \gpr1, SR_NX@h /* set Nx */ - kuep_update_sr \gpr1, \gpr2 -#endif -.endm - -.macro kuep_unlock gpr1, gpr2 -#ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 - rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */ - kuep_update_sr \gpr1, \gpr2 -#endif -.endm - #ifdef CONFIG_PPC_KUAP .macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */ diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 861e6eadc98c41..857375309255a8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -33,8 +33,10 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup if (!arch_irq_disabled_regs(regs)) trace_hardirqs_off(); - if (user_mode(regs)) + if (user_mode(regs)) { + kuep_lock(); account_cpu_user_entry(); + } #endif /* * Book3E reconciles irq soft mask in asm @@ -89,6 +91,8 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt exception_exit(state->ctx_state); #endif + if (user_mode(regs)) + kuep_unlock(); /* * Book3S exits to user via interrupt_exit_user_prepare(), which does * context tracking, which is a cleaner way to handle PREEMPT=y diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 7ec21af49a45c5..25671f711ec2b0 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -55,6 +55,14 @@ void setup_kuep(bool disabled); static inline void setup_kuep(bool disabled) { } #endif /* CONFIG_PPC_KUEP */ +#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) +void kuep_lock(void); +void kuep_unlock(void); +#else +static inline void kuep_lock(void) { } +static inline void kuep_unlock(void) { } +#endif + #ifdef CONFIG_PPC_KUAP void setup_kuap(bool disabled); #else diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 44d0eddf873839..112d6247c391bc 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -51,16 +51,9 @@ #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) .globl prepare_transfer_to_handler prepare_transfer_to_handler: - andi. r12,r9,MSR_PR addi r12,r2,THREAD - beq 2f -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_lock r11, r12 -#endif - blr /* if from kernel, check interrupted DOZE/NAP mode */ -2: kuap_save_and_lock r11, r12, r9, r5, r6 lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 @@ -86,9 +79,6 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) .globl transfer_to_syscall transfer_to_syscall: SAVE_NVGPRS(r1) -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_lock r11, r12 -#endif /* Calling convention has r9 = orig r0, r10 = regs */ addi r10,r1,STACK_FRAME_OVERHEAD @@ -105,9 +95,6 @@ ret_from_syscall: cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_unlock r5, r7 -#endif kuap_check r2, r4 lwz r4,_LINK(r1) lwz r5,_CCR(r1) @@ -311,9 +298,6 @@ interrupt_return: bne- .Lrestore_nvgprs .Lfast_user_interrupt_return: -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_unlock r10, r11 -#endif kuap_check r2, r4 lwz r11,_NIP(r1) lwz r12,_MSR(r1) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index c018fcdf915741..a8221ddcbd66da 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -133,7 +133,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) .macro prepare_transfer_to_handler #ifdef CONFIG_PPC_BOOK3S_32 + andi. r12,r9,MSR_PR + bne 777f bl prepare_transfer_to_handler +777: #endif .endm diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index cb96ae002af6d9..f8247009169703 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -105,7 +105,10 @@ END_BTB_FLUSH_SECTION .macro prepare_transfer_to_handler #ifdef CONFIG_E500 + andi. r12,r9,MSR_PR + bne 777f bl prepare_transfer_to_handler +777: #endif .endm diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 6875b82f613a1f..20ace874cd98df 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -33,6 +33,8 @@ notrace long system_call_exception(long r3, long r4, long r5, { syscall_fn f; + kuep_lock(); + regs->orig_gpr3 = r3; if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) @@ -354,6 +356,8 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, */ kuap_user_restore(regs); #endif + kuep_unlock(); + return ret; } diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 446d9de88ce4c1..7f0c8a78ba0c08 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -9,3 +9,4 @@ endif obj-y += mmu.o mmu_context.o obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o +obj-$(CONFIG_PPC_KUEP) += kuep.o diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c new file mode 100644 index 00000000000000..8ed1b863483973 --- /dev/null +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include + +#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do { \ + if (TASK_SIZE > ((n) << 28)) \ + mtsr(val1, (n) << 28); \ + if (TASK_SIZE > (((n) + 1) << 28)) \ + mtsr(val2, ((n) + 1) << 28); \ + val1 = (val1 + 0x222) & 0xf0ffffff; \ + val2 = (val2 + 0x222) & 0xf0ffffff; \ +} while (0) + +static __always_inline void kuep_update(u32 val) +{ + int val1 = val; + int val2 = (val + 0x111) & 0xf0ffffff; + + KUEP_UPDATE_TWO_USER_SEGMENTS(0); + KUEP_UPDATE_TWO_USER_SEGMENTS(2); + KUEP_UPDATE_TWO_USER_SEGMENTS(4); + KUEP_UPDATE_TWO_USER_SEGMENTS(6); + KUEP_UPDATE_TWO_USER_SEGMENTS(8); + KUEP_UPDATE_TWO_USER_SEGMENTS(10); + KUEP_UPDATE_TWO_USER_SEGMENTS(12); + KUEP_UPDATE_TWO_USER_SEGMENTS(14); +} + +void kuep_lock(void) +{ + kuep_update(mfsr(0) | SR_NX); +} + +void kuep_unlock(void) +{ + kuep_update(mfsr(0) & ~SR_NX); +} From ad2d2344771dabc5f0f14d85d5e7d2ddc613f385 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:48 +0000 Subject: [PATCH 074/302] powerpc/64s: Make kuap_check_amr() and kuap_get_and_check_amr() generic In preparation of porting powerpc32 to C syscall entry/exit, rename kuap_check_amr() and kuap_get_and_check_amr() as kuap_assert_locked() and kuap_get_and_assert_locked(), and move in the generic asm/kup.h the stub for when CONFIG_PPC_KUAP is not selected. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f82614d9b17b83abd739aa18fc08811815d0c2e3.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/kup.h | 24 ++---------------------- arch/powerpc/include/asm/kup.h | 10 +++++++++- arch/powerpc/kernel/interrupt.c | 12 ++++++------ arch/powerpc/kernel/irq.c | 2 +- 4 files changed, 18 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 8bd90505089624..9700da3a40933b 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -287,7 +287,7 @@ static inline void kuap_kernel_restore(struct pt_regs *regs, */ } -static inline unsigned long kuap_get_and_check_amr(void) +static inline unsigned long kuap_get_and_assert_locked(void) { if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { unsigned long amr = mfspr(SPRN_AMR); @@ -298,27 +298,7 @@ static inline unsigned long kuap_get_and_check_amr(void) return 0; } -#else /* CONFIG_PPC_PKEY */ - -static inline void kuap_user_restore(struct pt_regs *regs) -{ -} - -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) -{ -} - -static inline unsigned long kuap_get_and_check_amr(void) -{ - return 0; -} - -#endif /* CONFIG_PPC_PKEY */ - - -#ifdef CONFIG_PPC_KUAP - -static inline void kuap_check_amr(void) +static inline void kuap_assert_locked(void) { if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 25671f711ec2b0..aa5f7745931186 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -74,7 +74,15 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) return false; } -static inline void kuap_check_amr(void) { } +static inline void kuap_assert_locked(void) { } +static inline void kuap_save_and_lock(struct pt_regs *regs) { } +static inline void kuap_user_restore(struct pt_regs *regs) { } +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } + +static inline unsigned long kuap_get_and_assert_locked(void) +{ + return 0; +} /* * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 20ace874cd98df..a7cb511bf94531 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -76,7 +76,7 @@ notrace long system_call_exception(long r3, long r4, long r5, } else #endif #ifdef CONFIG_PPC64 - kuap_check_amr(); + kuap_assert_locked(); #endif booke_restore_dbcr0(); @@ -254,7 +254,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, CT_WARN_ON(ct_state() == CONTEXT_USER); #ifdef CONFIG_PPC64 - kuap_check_amr(); + kuap_assert_locked(); #endif regs->result = r3; @@ -380,7 +380,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned * AMR can only have been unlocked if we interrupted the kernel. */ #ifdef CONFIG_PPC64 - kuap_check_amr(); + kuap_assert_locked(); #endif local_irq_save(flags); @@ -451,7 +451,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign unsigned long flags; unsigned long ret = 0; #ifdef CONFIG_PPC64 - unsigned long amr; + unsigned long kuap; #endif if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && @@ -467,7 +467,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign CT_WARN_ON(ct_state() == CONTEXT_USER); #ifdef CONFIG_PPC64 - amr = kuap_get_and_check_amr(); + kuap = kuap_get_and_assert_locked(); #endif if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { @@ -511,7 +511,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign * value from the check above. */ #ifdef CONFIG_PPC64 - kuap_kernel_restore(regs, amr); + kuap_kernel_restore(regs, kuap); #endif return ret; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index d71fd10a1dd46b..5b72abbff96c5b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -282,7 +282,7 @@ static inline void replay_soft_interrupts_irqrestore(void) * and re-locking AMR but we shouldn't get here in the first place, * hence the warning. */ - kuap_check_amr(); + kuap_assert_locked(); if (kuap_state != AMR_KUAP_BLOCKED) set_kuap(AMR_KUAP_BLOCKED); From 21eb58ae4fce559d4e025df042db2bc0bb100f93 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:49 +0000 Subject: [PATCH 075/302] powerpc/32s: Create C version of kuap save/restore/check helpers In preparation of porting PPC32 to C syscall entry/exit, create C version of kuap_save_and_lock() and kuap_user_restore() and kuap_kernel_restore() and kuap_assert_locked() and kuap_get_and_assert_locked() on book3s/32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2be8fb729da4a0f9863b25e1b9d547174fcd5056.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 45 ++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index b97ea60f6fa302..46599bbc45256e 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -72,6 +72,51 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) isync(); /* Context sync required after mtsr() */ } +static inline void kuap_save_and_lock(struct pt_regs *regs) +{ + unsigned long kuap = current->thread.kuap; + u32 addr = kuap & 0xf0000000; + u32 end = kuap << 28; + + regs->kuap = kuap; + if (unlikely(!kuap)) + return; + + current->thread.kuap = 0; + kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* Set Ks */ +} + +static inline void kuap_user_restore(struct pt_regs *regs) +{ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +{ + u32 addr = regs->kuap & 0xf0000000; + u32 end = regs->kuap << 28; + + current->thread.kuap = regs->kuap; + + if (unlikely(regs->kuap == kuap)) + return; + + kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ +} + +static inline unsigned long kuap_get_and_assert_locked(void) +{ + unsigned long kuap = current->thread.kuap; + + WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0); + + return kuap; +} + +static inline void kuap_assert_locked(void) +{ + kuap_get_and_assert_locked(); +} + static __always_inline void allow_user_access(void __user *to, const void __user *from, u32 size, unsigned long dir) { From 0b45359aa2df7b761817a9664cfb53ea3070c390 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:50 +0000 Subject: [PATCH 076/302] powerpc/8xx: Create C version of kuap save/restore/check helpers In preparation of porting PPC32 to C syscall entry/exit, create C version of kuap_save_and_lock() and kuap_user_restore() and kuap_kernel_restore() and kuap_assert_locked() and kuap_get_and_assert_locked() on 8xx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/156a7c4b669d26785391422a5581a1d919544c9a.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 31 ++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 17a4a616436ffa..e6b5ebca47e58c 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -34,6 +34,37 @@ #include +static inline void kuap_save_and_lock(struct pt_regs *regs) +{ + regs->kuap = mfspr(SPRN_MD_AP); + mtspr(SPRN_MD_AP, MD_APG_KUAP); +} + +static inline void kuap_user_restore(struct pt_regs *regs) +{ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +{ + mtspr(SPRN_MD_AP, regs->kuap); +} + +static inline unsigned long kuap_get_and_assert_locked(void) +{ + unsigned long kuap = mfspr(SPRN_MD_AP); + + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16); + + return kuap; +} + +static inline void kuap_assert_locked(void) +{ + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + kuap_get_and_assert_locked(); +} + static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { From c16728835eec45fa82f4744a52940717ac828f6d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:51 +0000 Subject: [PATCH 077/302] powerpc/32: Manage KUAP in C Move all KUAP management in C. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/199365ddb58d579daf724815f2d0acb91cc49d19.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 50 +------------------- arch/powerpc/include/asm/interrupt.h | 2 + arch/powerpc/include/asm/kup.h | 9 ---- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 25 +--------- arch/powerpc/kernel/entry_32.S | 8 ---- arch/powerpc/kernel/interrupt.c | 19 ++------ arch/powerpc/kernel/process.c | 3 ++ 7 files changed, 11 insertions(+), 105 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 46599bbc45256e..1670dfe9d4f10b 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -5,55 +5,7 @@ #include #include -#ifdef __ASSEMBLY__ - -#ifdef CONFIG_PPC_KUAP - -.macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */ -101: mtsrin \gpr1, \gpr2 - addi \gpr1, \gpr1, 0x111 /* next VSID */ - rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ - addis \gpr2, \gpr2, 0x1000 /* address of next segment */ - cmplw \gpr2, \gpr3 - blt- 101b - isync -.endm - -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 - lwz \gpr2, KUAP(\thread) - rlwinm. \gpr3, \gpr2, 28, 0xf0000000 - stw \gpr2, STACK_REGS_KUAP(\sp) - beq+ 102f - li \gpr1, 0 - stw \gpr1, KUAP(\thread) - mfsrin \gpr1, \gpr2 - oris \gpr1, \gpr1, SR_KS@h /* set Ks */ - kuap_update_sr \gpr1, \gpr2, \gpr3 -102: -.endm - -.macro kuap_restore sp, current, gpr1, gpr2, gpr3 - lwz \gpr2, STACK_REGS_KUAP(\sp) - rlwinm. \gpr3, \gpr2, 28, 0xf0000000 - stw \gpr2, THREAD + KUAP(\current) - beq+ 102f - mfsrin \gpr1, \gpr2 - rlwinm \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */ - kuap_update_sr \gpr1, \gpr2, \gpr3 -102: -.endm - -.macro kuap_check current, gpr -#ifdef CONFIG_PPC_KUAP_DEBUG - lwz \gpr, THREAD + KUAP(\current) -999: twnei \gpr, 0 - EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) -#endif -.endm - -#endif /* CONFIG_PPC_KUAP */ - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ #ifdef CONFIG_PPC_KUAP diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 857375309255a8..7c633896d758cd 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -36,6 +36,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup if (user_mode(regs)) { kuep_lock(); account_cpu_user_entry(); + } else { + kuap_save_and_lock(regs); } #endif /* diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index aa5f7745931186..ec96232529ac27 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -28,15 +28,6 @@ #ifdef __ASSEMBLY__ #ifndef CONFIG_PPC_KUAP -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 -.endm - -.macro kuap_restore sp, current, gpr1, gpr2, gpr3 -.endm - -.macro kuap_check current, gpr -.endm - .macro kuap_check_amr gpr1, gpr2 .endm diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index e6b5ebca47e58c..295ef563960932 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -7,30 +7,7 @@ #ifdef CONFIG_PPC_KUAP -#ifdef __ASSEMBLY__ - -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 - lis \gpr2, MD_APG_KUAP@h /* only APG0 and APG1 are used */ - mfspr \gpr1, SPRN_MD_AP - mtspr SPRN_MD_AP, \gpr2 - stw \gpr1, STACK_REGS_KUAP(\sp) -.endm - -.macro kuap_restore sp, current, gpr1, gpr2, gpr3 - lwz \gpr1, STACK_REGS_KUAP(\sp) - mtspr SPRN_MD_AP, \gpr1 -.endm - -.macro kuap_check current, gpr -#ifdef CONFIG_PPC_KUAP_DEBUG - mfspr \gpr, SPRN_MD_AP - rlwinm \gpr, \gpr, 16, 0xffff -999: twnei \gpr, MD_APG_KUAP@h - EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) -#endif -.endm - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 112d6247c391bc..9160285cb2f444 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -51,10 +51,7 @@ #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) .globl prepare_transfer_to_handler prepare_transfer_to_handler: - addi r12,r2,THREAD - /* if from kernel, check interrupted DOZE/NAP mode */ - kuap_save_and_lock r11, r12, r9, r5, r6 lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 bt- 31-TLF_NAPPING,4f @@ -70,7 +67,6 @@ prepare_transfer_to_handler: lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ - kuap_restore r11, r2, r3, r4, r5 lwz r2, GPR2(r11) b fast_exception_return _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) @@ -95,7 +91,6 @@ ret_from_syscall: cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ - kuap_check r2, r4 lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -207,7 +202,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ - kuap_check r2, r0 #ifdef CONFIG_SMP /* We need a sync somewhere here to make sure that if the * previous task gets rescheduled on another CPU, it sees all @@ -298,7 +292,6 @@ interrupt_return: bne- .Lrestore_nvgprs .Lfast_user_interrupt_return: - kuap_check r2, r4 lwz r11,_NIP(r1) lwz r12,_MSR(r1) mtspr SPRN_SRR0,r11 @@ -347,7 +340,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) .Lfast_kernel_interrupt_return: cmpwi cr1,r3,0 - kuap_restore r1, r2, r3, r4, r5 lwz r11,_NIP(r1) lwz r12,_MSR(r1) mtspr SPRN_SRR0,r11 diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index a7cb511bf94531..c4dd4b8f9cfa5a 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -34,6 +34,9 @@ notrace long system_call_exception(long r3, long r4, long r5, syscall_fn f; kuep_lock(); +#ifdef CONFIG_PPC32 + kuap_save_and_lock(regs); +#endif regs->orig_gpr3 = r3; @@ -75,9 +78,7 @@ notrace long system_call_exception(long r3, long r4, long r5, isync(); } else #endif -#ifdef CONFIG_PPC64 kuap_assert_locked(); -#endif booke_restore_dbcr0(); @@ -253,9 +254,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, CT_WARN_ON(ct_state() == CONTEXT_USER); -#ifdef CONFIG_PPC64 kuap_assert_locked(); -#endif regs->result = r3; @@ -350,7 +349,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, account_cpu_user_exit(); -#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */ +#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not using this */ /* * We do this at the end so that we do context switch with KERNEL AMR */ @@ -379,9 +378,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned * We don't need to restore AMR on the way back to userspace for KUAP. * AMR can only have been unlocked if we interrupted the kernel. */ -#ifdef CONFIG_PPC64 kuap_assert_locked(); -#endif local_irq_save(flags); @@ -438,9 +435,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned /* * We do this at the end so that we do context switch with KERNEL AMR */ -#ifdef CONFIG_PPC64 kuap_user_restore(regs); -#endif return ret; } @@ -450,9 +445,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign { unsigned long flags; unsigned long ret = 0; -#ifdef CONFIG_PPC64 unsigned long kuap; -#endif if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && unlikely(!(regs->msr & MSR_RI))) @@ -466,9 +459,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign if (TRAP(regs) != 0x700) CT_WARN_ON(ct_state() == CONTEXT_USER); -#ifdef CONFIG_PPC64 kuap = kuap_get_and_assert_locked(); -#endif if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); @@ -510,9 +501,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign * which would cause Read-After-Write stalls. Hence, we take the AMR * value from the check above. */ -#ifdef CONFIG_PPC64 kuap_kernel_restore(regs, kuap); -#endif return ret; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5b30df7b1b79bd..b966c8e0ceadbc 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1256,6 +1256,9 @@ struct task_struct *__switch_to(struct task_struct *prev, */ restore_sprs(old_thread, new_thread); +#ifdef CONFIG_PPC32 + kuap_assert_locked(); +#endif last = _switch(old_thread, new_thread); #ifdef CONFIG_PPC_BOOK3S_64 From 802b5560393423166e436c7914b565f3cda9e6b9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 12:43:12 +0000 Subject: [PATCH 078/302] powerpc/Makefile: Remove workaround for gcc versions below 4.9 Commit 6ec4476ac825 ("Raise gcc version requirement to 4.9") made it impossible to build with gcc 4.8 and under. Remove related workaround. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a1e552006b8c51f23edd2f6cabdd9a986c631146.1615380184.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5f8544cf724a4f..32dd693b4e4205 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -181,12 +181,6 @@ CC_FLAGS_FTRACE := -pg ifdef CONFIG_MPROFILE_KERNEL CC_FLAGS_FTRACE += -mprofile-kernel endif -# Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8 -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199 -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828 -ifndef CONFIG_CC_IS_CLANG -CC_FLAGS_FTRACE += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog) -endif endif CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) From f239873fcd953557ba9a9781d10ca95c0594e2b3 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Mon, 15 Mar 2021 03:34:36 +0530 Subject: [PATCH 079/302] powerpc/64e: Trivial spelling fixes throughout head_fsl_booke.S Trivial spelling fixes throughout the file. Signed-off-by: Bhaskar Chowdhury Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210314220436.3417083-1-unixbhaskar@gmail.com --- arch/powerpc/kernel/head_fsl_booke.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 3efc5baa801a9c..a1a5c3f10dc424 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -113,7 +113,7 @@ _ENTRY(_start); 1: /* - * We have the runtime (virutal) address of our base. + * We have the runtime (virtual) address of our base. * We calculate our shift of offset from a 64M page. * We could map the 64M page we belong to at PAGE_OFFSET and * get going from there. @@ -497,7 +497,7 @@ END_BTB_FLUSH_SECTION #endif #endif - bne 2f /* Bail if permission/valid mismach */ + bne 2f /* Bail if permission/valid mismatch */ /* Jump to common tlb load */ b finish_tlb_load @@ -592,7 +592,7 @@ END_BTB_FLUSH_SECTION #endif #endif - bne 2f /* Bail if permission mismach */ + bne 2f /* Bail if permission mismatch */ /* Jump to common TLB load point */ b finish_tlb_load From 7a7d744ffe87ae10ab98004d1a6ca1f691af58e1 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Fri, 12 Mar 2021 16:55:37 +0530 Subject: [PATCH 080/302] powerpc/mm/book3s64: Fix a typo in mmu_context.c s/detalis/details/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210312112537.4585-1-unixbhaskar@gmail.com --- arch/powerpc/mm/book3s64/mmu_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index 0c8557220ae28a..c10fc8a72fb37a 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -119,7 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm) /* This is fork. Copy hash_context details from current->mm */ memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context)); #ifdef CONFIG_PPC_SUBPAGE_PROT - /* inherit subpage prot detalis if we have one. */ + /* inherit subpage prot details if we have one. */ if (current->mm->context.hash_context->spt) { mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); From e448e1e774dc0ca307c17e961daf7ede2e635c57 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 15 Mar 2021 12:00:09 +0000 Subject: [PATCH 081/302] powerpc/math: Fix missing __user qualifier for get_user() and other sparse warnings Sparse reports the following problems: arch/powerpc/math-emu/math.c:228:21: warning: Using plain integer as NULL pointer arch/powerpc/math-emu/math.c:228:31: warning: Using plain integer as NULL pointer arch/powerpc/math-emu/math.c:228:41: warning: Using plain integer as NULL pointer arch/powerpc/math-emu/math.c:228:51: warning: Using plain integer as NULL pointer arch/powerpc/math-emu/math.c:237:13: warning: incorrect type in initializer (different address spaces) arch/powerpc/math-emu/math.c:237:13: expected unsigned int [noderef] __user *_gu_addr arch/powerpc/math-emu/math.c:237:13: got unsigned int [usertype] * arch/powerpc/math-emu/math.c:226:1: warning: symbol 'do_mathemu' was not declared. Should it be static? Add missing __user qualifier when casting pointer used in get_user() Use NULL instead of 0 to initialise opX local variables. Add a prototype for do_mathemu() (Added in processor.h like sparc) Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e4d1aae7604d89c98a52dfd8ce8443462e595670.1615809591.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/processor.h | 2 ++ arch/powerpc/kernel/traps.c | 1 - arch/powerpc/math-emu/math.c | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index eae16facc390eb..ad36e852157789 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -413,6 +413,8 @@ extern int fix_alignment(struct pt_regs *); #define NET_IP_ALIGN 0 #endif +int do_mathemu(struct pt_regs *regs); + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PROCESSOR_H */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 286b3a6b5c5e24..76d17492e0e52d 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1405,7 +1405,6 @@ int is_valid_bugaddr(unsigned long addr) static int emulate_math(struct pt_regs *regs) { int ret; - extern int do_mathemu(struct pt_regs *regs); ret = do_mathemu(regs); if (ret >= 0) diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c index 30b4b69c694198..327165f26ca6a6 100644 --- a/arch/powerpc/math-emu/math.c +++ b/arch/powerpc/math-emu/math.c @@ -225,7 +225,7 @@ record_exception(struct pt_regs *regs, int eflag) int do_mathemu(struct pt_regs *regs) { - void *op0 = 0, *op1 = 0, *op2 = 0, *op3 = 0; + void *op0 = NULL, *op1 = NULL, *op2 = NULL, *op3 = NULL; unsigned long pc = regs->nip; signed short sdisp; u32 insn = 0; @@ -234,7 +234,7 @@ do_mathemu(struct pt_regs *regs) int type = 0; int eflag, trap; - if (get_user(insn, (u32 *)pc)) + if (get_user(insn, (u32 __user *)pc)) return -EFAULT; switch (insn >> 26) { From 6eeca7a11379e9dd05493bbdba57515b36a2e3cf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 15 Mar 2021 11:01:26 +0000 Subject: [PATCH 082/302] powerpc/asm-offsets: GPR14 is not needed either Commit aac6a91fea93 ("powerpc/asm: Remove unused symbols in asm-offsets.c") removed GPR15 to GPR31 but kept GPR14, probably because it pops up in a couple of comments when doing a grep. However, it was never used either, so remove it as well. Fixes: aac6a91fea93 ("powerpc/asm: Remove unused symbols in asm-offsets.c") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9881c68fbca004f9ea18fc9473f630e11ccd6417.1615806071.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/asm-offsets.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 85ba2b0bc8d870..d2f1b94e944d98 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -320,9 +320,6 @@ int main(void) STACK_PT_REGS_OFFSET(GPR11, gpr[11]); STACK_PT_REGS_OFFSET(GPR12, gpr[12]); STACK_PT_REGS_OFFSET(GPR13, gpr[13]); -#ifndef CONFIG_PPC64 - STACK_PT_REGS_OFFSET(GPR14, gpr[14]); -#endif /* CONFIG_PPC64 */ /* * Note: these symbols include _ because they overlap with special * register names From 9214cf0f48cac3c6aa86f34e14969b5eccb72fad Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 15 Mar 2021 15:24:56 +0800 Subject: [PATCH 083/302] powerpc/xive: use true and false for bool variable fixed the following coccicheck: ./arch/powerpc/sysdev/xive/spapr.c:552:8-9: WARNING: return of 0/1 in function 'xive_spapr_match' with return type bool Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1615793096-83758-1-git-send-email-yang.lee@linux.alibaba.com --- arch/powerpc/sysdev/xive/spapr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 01ccc0786ada6c..f143b6f111ac0a 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -549,7 +549,7 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, static bool xive_spapr_match(struct device_node *node) { /* Ignore cascaded controllers for the moment */ - return 1; + return true; } #ifdef CONFIG_SMP From 7a0fdc19f2415683f403abee7bb87085d0c624ad Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Mon, 15 Mar 2021 15:35:24 +0800 Subject: [PATCH 084/302] powerpc/pci: fix warning comparing pointer to 0 Fix the following coccicheck warning: ./arch/powerpc/platforms/maple/pci.c:37:16-17: WARNING comparing pointer to 0. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1615793724-97015-1-git-send-email-jiapeng.chong@linux.alibaba.com --- arch/powerpc/platforms/maple/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index a20b9576de225d..37875e478b3a71 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -34,7 +34,7 @@ static struct pci_controller *u3_agp, *u3_ht, *u4_pcie; static int __init fixup_one_level_bus_range(struct device_node *node, int higher) { - for (; node != 0;node = node->sibling) { + for (; node; node = node->sibling) { const int *bus_range; const unsigned int *class_code; int len; From 7a7685acd2129e2e5d433636120b4c5038c03e51 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 14 Mar 2021 20:33:00 +1100 Subject: [PATCH 085/302] powerpc/eeh: Fix build failure with CONFIG_PROC_FS=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The build fails with CONFIG_PROC_FS=n: arch/powerpc/kernel/eeh.c:1571:12: error: ‘proc_eeh_show’ defined but not used 1571 | static int proc_eeh_show(struct seq_file *m, void *v) Wrap proc_eeh_show() in an ifdef to avoid it. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210314093300.131998-1-mpe@ellerman.id.au --- arch/powerpc/kernel/eeh.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index cd60bc1c870113..01dbb44a0fe380 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1568,6 +1568,7 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, } EXPORT_SYMBOL_GPL(eeh_pe_inject_err); +#ifdef CONFIG_PROC_FS static int proc_eeh_show(struct seq_file *m, void *v) { if (!eeh_enabled()) { @@ -1594,6 +1595,7 @@ static int proc_eeh_show(struct seq_file *m, void *v) return 0; } +#endif /* CONFIG_PROC_FS */ #ifdef CONFIG_DEBUG_FS From c2a2a5d0270c641ce030aee247569afc1a0efbe5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 14 Mar 2021 20:33:20 +1100 Subject: [PATCH 086/302] powerpc/64s: Fold update_current_thread_[i]amr() into their only callers lkp reported warnings in some configuration due to update_current_thread_amr() being unused: arch/powerpc/mm/book3s64/pkeys.c:284:20: error: unused function 'update_current_thread_amr' static inline void update_current_thread_amr(u64 value) Which is because it's only use is inside an ifdef. We could move it inside the ifdef, but it's a single line function and only has one caller, so just fold it in. Similarly update_current_thread_iamr() is small and only called once, so fold it in also. Fixes: 48a8ab4eeb82 ("powerpc/book3s64/pkeys: Don't update SPRN_AMR when in kernel mode.") Reported-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210314093320.132331-1-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/pkeys.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 15dcc5ad91c558..a2d9ad13870947 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -301,19 +301,6 @@ void setup_kuap(bool disabled) } #endif -static inline void update_current_thread_amr(u64 value) -{ - current->thread.regs->amr = value; -} - -static inline void update_current_thread_iamr(u64 value) -{ - if (!likely(pkey_execute_disable_supported)) - return; - - current->thread.regs->iamr = value; -} - #ifdef CONFIG_PPC_MEM_KEYS void pkey_mm_init(struct mm_struct *mm) { @@ -328,7 +315,7 @@ static inline void init_amr(int pkey, u8 init_bits) u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); - update_current_thread_amr(old_amr | new_amr_bits); + current->thread.regs->amr = old_amr | new_amr_bits; } static inline void init_iamr(int pkey, u8 init_bits) @@ -336,7 +323,10 @@ static inline void init_iamr(int pkey, u8 init_bits) u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); - update_current_thread_iamr(old_iamr | new_iamr_bits); + if (!likely(pkey_execute_disable_supported)) + return; + + current->thread.regs->iamr = old_iamr | new_iamr_bits; } /* From 55c2f5574a013d2dbf1012a2ad93cb8d947279a7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 14 Mar 2021 20:33:33 +1100 Subject: [PATCH 087/302] powerpc: Fix section mismatch warning in smp_setup_pacas() Section mismatch in reference from the function .smp_setup_pacas() to the function .init.text:.allocate_paca() The only caller of smp_setup_pacas() is setup_arch() which is __init, so mark smp_setup_pacas() __init. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210314093333.132657-1-mpe@ellerman.id.au --- arch/powerpc/kernel/setup-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 7221f11acf0460..74a98fff2c2f91 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -828,7 +828,7 @@ static __init void print_system_info(void) } #ifdef CONFIG_SMP -static void smp_setup_pacas(void) +static void __init smp_setup_pacas(void) { int cpu; From b77878052a142737522e0e3c2a621c988a4cd7cd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 14 Mar 2021 20:33:41 +1100 Subject: [PATCH 088/302] powerpc/fsl-pci: Fix section mismatch warning Section mismatch in reference from the function .fsl_add_bridge() to the function .init.text:.setup_pci_cmd() fsl_add_bridge() is not __init, and can't be, and is the only caller of setup_pci_cmd(). Fix it by making setup_pci_cmd() non-init. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210314093341.132986-1-mpe@ellerman.id.au --- arch/powerpc/sysdev/fsl_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 040b9d01c07984..69af73765783b0 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -455,7 +455,7 @@ static void setup_pci_atmu(struct pci_controller *hose) } } -static void __init setup_pci_cmd(struct pci_controller *hose) +static void setup_pci_cmd(struct pci_controller *hose) { u16 cmd; int cap_x; From 98c26a72751ecb2ed247cdfd6cb2385f37195707 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 15 Mar 2021 14:52:51 +0000 Subject: [PATCH 089/302] powerpc/mm: Remove unneeded #ifdef CONFIG_PPC_MEM_KEYS In fault.c, #ifdef CONFIG_PPC_MEM_KEYS is not needed because all functions are always defined, and arch_vma_access_permitted() always returns true when CONFIG_PPC_MEM_KEYS is not defined so access_pkey_error() will return false so bad_access_pkey() will never be called. Include linux/pkeys.h to get a definition of vma_pkeys() for bad_access_pkey(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8038392f38d81f2ad169347efac29146f553b238.1615819955.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/fault.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0d4e4ff77e03ac..0c0b1c2cfb49c8 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -88,7 +89,6 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } -#ifdef CONFIG_PPC_MEM_KEYS static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, struct vm_area_struct *vma) { @@ -128,7 +128,6 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, return 0; } -#endif static noinline int bad_access(struct pt_regs *regs, unsigned long address) { @@ -235,7 +234,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return false; } -#ifdef CONFIG_PPC_MEM_KEYS static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, struct vm_area_struct *vma) { @@ -249,7 +247,6 @@ static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, return false; } -#endif static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma) { @@ -497,11 +494,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, return bad_area(regs, address); } -#ifdef CONFIG_PPC_MEM_KEYS if (unlikely(access_pkey_error(is_write, is_exec, (error_code & DSISR_KEYFAULT), vma))) return bad_access_pkey(regs, address, vma); -#endif /* CONFIG_PPC_MEM_KEYS */ if (unlikely(access_error(is_write, is_exec, vma))) return bad_access(regs, address); From 1479e3d3b7559133b0a107772b5841e9c2cad450 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:52:05 +1000 Subject: [PATCH 090/302] powerpc/64s: Fix hash fault to use TRAP accessor Hash faults use the trap vector to decide whether this is an instruction or data fault. This should use the TRAP accessor rather than open access regs->trap. This won't cause a problem at the moment because 64s only uses trap flags for system call interrupts (the norestart flag), but that could change if any other trap flags get used in future. Fixes: a4922f5442e7e ("powerpc/64s: move the hash fault handling logic to C") Suggested-by: Christophe Leroy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316105205.407767-1-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 581b20a2feaf61..7719995323c3f2 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1545,10 +1545,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) if (user_mode(regs) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; - if (regs->trap == 0x400) + if (TRAP(regs) == 0x400) access |= _PAGE_EXEC; - err = hash_page_mm(mm, ea, access, regs->trap, flags); + err = hash_page_mm(mm, ea, access, TRAP(regs), flags); if (unlikely(err < 0)) { // failed to instert a hash PTE due to an hypervisor error if (user_mode(regs)) { From 89f7d2927ae16ea470d29234447763826e40c6cf Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Wed, 17 Mar 2021 14:34:13 +0530 Subject: [PATCH 091/302] powerpc/kernel: Trivial typo fix in kgdb.c s/procesing/processing/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210317090413.120891-1-unixbhaskar@gmail.com --- arch/powerpc/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 409080208a6c4a..7dd2ad3603ad26 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -376,7 +376,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) } /* - * This function does PowerPC specific procesing for interfacing to gdb. + * This function does PowerPC specific processing for interfacing to gdb. */ int kgdb_arch_handle_exception(int vector, int signo, int err_code, char *remcom_in_buffer, char *remcom_out_buffer, From 8b8adeb3007f67076141f547f0b2f62b299a383c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 18 Mar 2021 09:18:29 +0530 Subject: [PATCH 092/302] powerpc/book3s64/kuap: Move Kconfig varriables to BOOK3S_64 With below two commits: commit c91435d95c49 ("powerpc/book3s64/hash/kuep: Enable KUEP on hash") commit b2ff33a10c8b ("powerpc/book3s64/hash/kuap: Enable kuap on hash") the kernel now supports kuap/kuep with hash translation. Hence select the Kconfig even when radix is disabled. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210318034829.72255-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/platforms/Kconfig.cputype | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 3ce907523b1e16..9240743caefc27 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -101,6 +101,8 @@ config PPC_BOOK3S_64 select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK select PPC_MM_SLICES + select PPC_HAVE_KUEP + select PPC_HAVE_KUAP config PPC_BOOK3E_64 bool "Embedded processors" @@ -363,8 +365,6 @@ config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 select ARCH_HAS_GIGANTIC_PAGE - select PPC_HAVE_KUEP - select PPC_HAVE_KUAP default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this From accdd093f260bc8c8a8f580ee48e49ad5c5f91b2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 16 Mar 2021 07:57:13 +0000 Subject: [PATCH 093/302] powerpc: Activate HAVE_RELIABLE_STACKTRACE for all CONFIG_HAVE_RELIABLE_STACKTRACE is applicable to all, no reason to limit it to book3s/64le Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/955248c6423cb068c5965923121ba31d4dd2fdde.1615881400.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 2 +- arch/powerpc/kernel/stacktrace.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d46db0bfb99878..e446d68bebe4ae 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -235,7 +235,7 @@ config PPC select MMU_GATHER_RCU_TABLE_FREE select MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN + select HAVE_RELIABLE_STACKTRACE select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index b6440657ef92d0..a2a05055110917 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -88,7 +88,6 @@ save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) } EXPORT_SYMBOL_GPL(save_stack_trace_regs); -#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE /* * This function returns an error if it detects any unreliable features of the * stack. Otherwise it guarantees that the stack trace is reliable. @@ -220,7 +219,6 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk, return ret; } -#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */ #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) static void handle_backtrace_ipi(struct pt_regs *regs) From 826a307b0a11e605b4be0b2727550b510c4a88cd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 16 Mar 2021 07:57:14 +0000 Subject: [PATCH 094/302] powerpc: Rename 'tsk' parameter into 'task' To better match generic code, rename 'tsk' to 'task' in some stacktrace functions in preparation of following patch which converts powerpc to generic ARCH_STACKWALK. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/117f0200e11961af6c0fdf85c98373e5dcf96a47.1615881400.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/stacktrace.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index a2a05055110917..5b93650bc16c8c 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -27,13 +27,13 @@ * Save stack-backtrace addresses into a stack_trace buffer. */ static void save_context_stack(struct stack_trace *trace, unsigned long sp, - struct task_struct *tsk, int savesched) + struct task_struct *task, int savesched) { for (;;) { unsigned long *stack = (unsigned long *) sp; unsigned long newsp, ip; - if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD)) return; newsp = stack[0]; @@ -94,18 +94,18 @@ EXPORT_SYMBOL_GPL(save_stack_trace_regs); * * If the task is not 'current', the caller *must* ensure the task is inactive. */ -static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, +static int __save_stack_trace_tsk_reliable(struct task_struct *task, struct stack_trace *trace) { unsigned long sp; unsigned long newsp; - unsigned long stack_page = (unsigned long)task_stack_page(tsk); + unsigned long stack_page = (unsigned long)task_stack_page(task); unsigned long stack_end; int graph_idx = 0; bool firstframe; stack_end = stack_page + THREAD_SIZE; - if (!is_idle_task(tsk)) { + if (!is_idle_task(task)) { /* * For user tasks, this is the SP value loaded on * kernel entry, see "PACAKSAVE(r13)" in _switch() and @@ -129,10 +129,10 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, stack_end -= STACK_FRAME_OVERHEAD; } - if (tsk == current) + if (task == current) sp = current_stack_frame(); else - sp = tsk->thread.ksp; + sp = task->thread.ksp; if (sp < stack_page + sizeof(struct thread_struct) || sp > stack_end - STACK_FRAME_MIN_SIZE) { @@ -181,7 +181,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, * FIXME: IMHO these tests do not belong in * arch-dependent code, they are generic. */ - ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack); + ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack); #ifdef CONFIG_KPROBES /* * Mark stacktraces with kretprobed functions on them From a1cdef04f22dd5ad9e1ccf5d05a549c697b7f52d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 16 Mar 2021 07:57:15 +0000 Subject: [PATCH 095/302] powerpc: Convert stacktrace to generic ARCH_STACKWALK This patch converts powerpc stacktrace to the generic ARCH_STACKWALK implemented by commit 214d8ca6ee85 ("stacktrace: Provide common infrastructure") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/73b36bbb101299760b95ecd2cd3a46554bea8bf9.1615881400.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/stacktrace.c | 91 ++++++-------------------------- 2 files changed, 17 insertions(+), 75 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e446d68bebe4ae..08e594a4ffb877 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -145,6 +145,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64 select ARCH_USE_BUILTIN_BSWAP diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 5b93650bc16c8c..80f92f5b539390 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -23,12 +23,18 @@ #include -/* - * Save stack-backtrace addresses into a stack_trace buffer. - */ -static void save_context_stack(struct stack_trace *trace, unsigned long sp, - struct task_struct *task, int savesched) +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { + unsigned long sp; + + if (regs) + sp = regs->gpr[1]; + else if (task == current) + sp = current_stack_frame(); + else + sp = task->thread.ksp; + for (;;) { unsigned long *stack = (unsigned long *) sp; unsigned long newsp, ip; @@ -39,63 +45,21 @@ static void save_context_stack(struct stack_trace *trace, unsigned long sp, newsp = stack[0]; ip = stack[STACK_FRAME_LR_SAVE]; - if (savesched || !in_sched_functions(ip)) { - if (!trace->skip) - trace->entries[trace->nr_entries++] = ip; - else - trace->skip--; - } - - if (trace->nr_entries >= trace->max_entries) + if (!consume_entry(cookie, ip)) return; sp = newsp; } } -void save_stack_trace(struct stack_trace *trace) -{ - unsigned long sp; - - sp = current_stack_frame(); - - save_context_stack(trace, sp, current, 1); -} -EXPORT_SYMBOL_GPL(save_stack_trace); - -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) -{ - unsigned long sp; - - if (!try_get_task_stack(tsk)) - return; - - if (tsk == current) - sp = current_stack_frame(); - else - sp = tsk->thread.ksp; - - save_context_stack(trace, sp, tsk, 0); - - put_task_stack(tsk); -} -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); - -void -save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) -{ - save_context_stack(trace, regs->gpr[1], current, 0); -} -EXPORT_SYMBOL_GPL(save_stack_trace_regs); - /* * This function returns an error if it detects any unreliable features of the * stack. Otherwise it guarantees that the stack trace is reliable. * * If the task is not 'current', the caller *must* ensure the task is inactive. */ -static int __save_stack_trace_tsk_reliable(struct task_struct *task, - struct stack_trace *trace) +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) { unsigned long sp; unsigned long newsp; @@ -191,35 +155,12 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *task, return -EINVAL; #endif - if (trace->nr_entries >= trace->max_entries) - return -E2BIG; - if (!trace->skip) - trace->entries[trace->nr_entries++] = ip; - else - trace->skip--; + if (!consume_entry(cookie, ip)) + return -EINVAL; } return 0; } -int save_stack_trace_tsk_reliable(struct task_struct *tsk, - struct stack_trace *trace) -{ - int ret; - - /* - * If the task doesn't have a stack (e.g., a zombie), the stack is - * "reliably" empty. - */ - if (!try_get_task_stack(tsk)) - return 0; - - ret = __save_stack_trace_tsk_reliable(tsk, trace); - - put_task_stack(tsk); - - return ret; -} - #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) static void handle_backtrace_ipi(struct pt_regs *regs) { From a2308836880bf1501ff9373c611dc2970247d42b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 16 Mar 2021 07:57:16 +0000 Subject: [PATCH 096/302] powerpc: Fix arch_stack_walk() to have running function as first entry It seems like other architectures, namely x86 and arm64 and riscv at least, include the running function as top entry when saving stack trace with save_stack_trace_regs(). Functionnalities like KFENCE expect it. Do the same on powerpc, it allows KFENCE and other users to properly identify the faulting function as depicted below. Before the patch KFENCE was identifying finish_task_switch.isra as the faulting function. [ 14.937370] ================================================================== [ 14.948692] BUG: KFENCE: invalid read in test_invalid_access+0x54/0x108 [ 14.948692] [ 14.956814] Invalid read at 0xdf98800a: [ 14.960664] test_invalid_access+0x54/0x108 [ 14.964876] finish_task_switch.isra.0+0x54/0x23c [ 14.969606] kunit_try_run_case+0x5c/0xd0 [ 14.973658] kunit_generic_run_threadfn_adapter+0x24/0x30 [ 14.979079] kthread+0x15c/0x174 [ 14.982342] ret_from_kernel_thread+0x14/0x1c [ 14.986731] [ 14.988236] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: G B 5.12.0-rc1-01537-g95f6e2088d7e-dirty #4682 [ 14.999795] NIP: c016ec2c LR: c02f517c CTR: c016ebd8 [ 15.004851] REGS: e2449d90 TRAP: 0301 Tainted: G B (5.12.0-rc1-01537-g95f6e2088d7e-dirty) [ 15.015274] MSR: 00009032 CR: 22000004 XER: 00000000 [ 15.022043] DAR: df98800a DSISR: 20000000 [ 15.022043] GPR00: c02f517c e2449e50 c1142080 e100dd24 c084b13c 00000008 c084b32b c016ebd8 [ 15.022043] GPR08: c0850000 df988000 c0d10000 e2449eb0 22000288 [ 15.040581] NIP [c016ec2c] test_invalid_access+0x54/0x108 [ 15.046010] LR [c02f517c] kunit_try_run_case+0x5c/0xd0 [ 15.051181] Call Trace: [ 15.053637] [e2449e50] [c005a68c] finish_task_switch.isra.0+0x54/0x23c (unreliable) [ 15.061338] [e2449eb0] [c02f517c] kunit_try_run_case+0x5c/0xd0 [ 15.067215] [e2449ed0] [c02f648c] kunit_generic_run_threadfn_adapter+0x24/0x30 [ 15.074472] [e2449ef0] [c004e7b0] kthread+0x15c/0x174 [ 15.079571] [e2449f30] [c001317c] ret_from_kernel_thread+0x14/0x1c [ 15.085798] Instruction dump: [ 15.088784] 8129d608 38e7ebd8 81020280 911f004c 39000000 995f0024 907f0028 90ff001c [ 15.096613] 3949000a 915f0020 3d40c0d1 3d00c085 <8929000a> 3908adb0 812a4b98 3d40c02f [ 15.104612] ================================================================== Fixes: 35de3b1aa168 ("powerpc: Implement save_stack_trace_regs() to enable kprobe stack tracing") Signed-off-by: Christophe Leroy Acked-by: Marco Elver Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/21324f9e2f21d1640c8397b4d1d857a9355a2283.1615881400.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/stacktrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 80f92f5b539390..1deb1bf331ddbd 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -28,6 +28,9 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, { unsigned long sp; + if (regs && !consume_entry(cookie, regs->nip)) + return; + if (regs) sp = regs->gpr[1]; else if (task == current) From bbbe563f8490958861777d98871e16960163ea1b Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Fri, 19 Mar 2021 07:44:41 +0800 Subject: [PATCH 097/302] powerpc/iommu/debug: fix ifnullfree.cocci warnings arch/powerpc/kernel/iommu.c:76:2-16: WARNING: NULL check before some freeing functions is not needed. NULL check before some freeing functions is not needed. Based on checkpatch warning "kfree(NULL) is safe this check is probably not required" and kfreeaddr.cocci by Julia Lawall. Generated by: scripts/coccinelle/free/ifnullfree.cocci Fixes: 691602aab9c3 ("powerpc/iommu/debug: Add debugfs entries for IOMMU tables") Reported-by: kernel test robot Signed-off-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210318234441.GA63469@f8e20a472e81 --- arch/powerpc/kernel/iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c00214a4355c80..2168714144348c 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -72,8 +72,7 @@ static void iommu_debugfs_del(struct iommu_table *tbl) sprintf(name, "%08lx", tbl->it_index); liobn_entry = debugfs_lookup(name, iommu_debugfs_dir); - if (liobn_entry) - debugfs_remove(liobn_entry); + debugfs_remove(liobn_entry); } #else static void iommu_debugfs_add(struct iommu_table *tbl){} From a329ddd472fa2af0c19a73b8658898ae7fd658ad Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 18 Mar 2021 17:25:07 +0000 Subject: [PATCH 098/302] powerpc/embedded6xx: Remove CONFIG_MV64X60 Commit 92c8c16f3457 ("powerpc/embedded6xx: Remove C2K board support") moved the last selector of CONFIG_MV64X60. As it is not a user selectable config, it can be removed. Signed-off-by: Christophe Leroy Acked-by: Wolfram Sang # for I2C Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/19e57d16692dcd1ca67ba880d7273a57fab416aa.1616085654.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/embedded6xx/Kconfig | 5 ----- drivers/i2c/busses/Kconfig | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig index c1920961f41044..4c6d703a4284b9 100644 --- a/arch/powerpc/platforms/embedded6xx/Kconfig +++ b/arch/powerpc/platforms/embedded6xx/Kconfig @@ -71,11 +71,6 @@ config MPC10X_BRIDGE bool select PPC_INDIRECT_PCI -config MV64X60 - bool - select PPC_INDIRECT_PCI - select CHECK_CACHE_COHERENCY - config GAMECUBE_COMMON bool diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 05ebf7546e3f61..20edcda1c6f414 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -776,7 +776,7 @@ config I2C_MT7621 config I2C_MV64XXX tristate "Marvell mv64xxx I2C Controller" - depends on MV64X60 || PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU || COMPILE_TEST + depends on PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU || COMPILE_TEST help If you say yes to this option, support will be included for the built-in I2C interface on the Marvell 64xxx line of host bridges. From d2313da4ff56bd631a3afe7a17992ed5bd0e04a6 Mon Sep 17 00:00:00 2001 From: He Ying Date: Tue, 16 Mar 2021 00:11:48 -0400 Subject: [PATCH 099/302] powerpc/setup_64: Fix sparse warnings Sparse warns: warning: symbol 'rfi_flush' was not declared. warning: symbol 'entry_flush' was not declared. warning: symbol 'uaccess_flush' was not declared. Define 'entry_flush' and 'uaccess_flush' as static because they are not referenced outside the file. Include asm/security_features.h in which 'rfi_flush' is declared. Reported-by: Hulk Robot Signed-off-by: He Ying Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316041148.29694-1-heying24@huawei.com --- arch/powerpc/kernel/setup_64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 560ed8b975e77f..04a31586f76071 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -949,8 +950,8 @@ static bool no_rfi_flush; static bool no_entry_flush; static bool no_uaccess_flush; bool rfi_flush; -bool entry_flush; -bool uaccess_flush; +static bool entry_flush; +static bool uaccess_flush; DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); EXPORT_SYMBOL(uaccess_flush_key); From 48cf12d88969bd4238b8769767eb476970319d93 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 06:34:43 +0000 Subject: [PATCH 100/302] powerpc/irq: Inline call_do_irq() and call_do_softirq() call_do_irq() and call_do_softirq() are simple enough to be worth inlining. Inlining them avoids an mflr/mtlr pair plus a save/reload on stack. This is inspired from S390 arch. Several other arches do more or less the same. The way sparc arch does seems odd thought. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210320122227.345427-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/irq.h | 2 -- arch/powerpc/kernel/irq.c | 41 ++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/misc_32.S | 25 --------------------- arch/powerpc/kernel/misc_64.S | 22 ------------------ 4 files changed, 41 insertions(+), 49 deletions(-) diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index f3f264e441a79e..b2bd588304300f 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -53,8 +53,6 @@ extern void *mcheckirq_ctx[NR_CPUS]; extern void *hardirq_ctx[NR_CPUS]; extern void *softirq_ctx[NR_CPUS]; -void call_do_softirq(void *sp); -void call_do_irq(struct pt_regs *regs, void *sp); extern void do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); extern void __do_irq(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5b72abbff96c5b..260effc0a435d8 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -667,6 +667,47 @@ static inline void check_stack_overflow(void) } } +static __always_inline void call_do_softirq(const void *sp) +{ + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r1, %[sp] ;" + "bl %[callee] ;" + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [callee] "i" (__do_softirq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + +static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) +{ + register unsigned long r3 asm("r3") = (unsigned long)regs; + + /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r1, %[sp] ;" + "bl %[callee] ;" + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + "+r" (r3) + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [callee] "i" (__do_irq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + void __do_irq(struct pt_regs *regs) { unsigned int irq; diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index acc410043b9656..6a076bef293215 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -27,31 +27,6 @@ .text -_GLOBAL(call_do_softirq) - mflr r0 - stw r0,4(r1) - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) - mr r1,r3 - bl __do_softirq - lwz r1,0(r1) - lwz r0,4(r1) - mtlr r0 - blr - -/* - * void call_do_irq(struct pt_regs *regs, void *sp); - */ -_GLOBAL(call_do_irq) - mflr r0 - stw r0,4(r1) - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) - mr r1,r4 - bl __do_irq - lwz r1,0(r1) - lwz r0,4(r1) - mtlr r0 - blr - /* * This returns the high 64 bits of the product of two 64-bit numbers. */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 070465825c2102..4b761a18a74d18 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -27,28 +27,6 @@ .text -_GLOBAL(call_do_softirq) - mflr r0 - std r0,16(r1) - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) - mr r1,r3 - bl __do_softirq - ld r1,0(r1) - ld r0,16(r1) - mtlr r0 - blr - -_GLOBAL(call_do_irq) - mflr r0 - std r0,16(r1) - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) - mr r1,r4 - bl __do_irq - ld r1,0(r1) - ld r0,16(r1) - mtlr r0 - blr - _GLOBAL(__bswapdi2) EXPORT_SYMBOL(__bswapdi2) srdi r8,r3,32 From e23ecdf9fd87c547a3ac55bcebaf7df28df2fab0 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Mon, 22 Mar 2021 08:03:07 +0530 Subject: [PATCH 101/302] cxl: Fix couple of spellings s/filesytem/filesystem/ s/symantics/semantics/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210322023307.168754-1-unixbhaskar@gmail.com --- drivers/misc/cxl/context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index fb2eff69e449d2..e627b405662395 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -52,7 +52,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) * can always access it when dereferenced from IDR. For the same * reason, the segment table is only destroyed after the context is * removed from the IDR. Access to this in the IOCTL is protected by - * Linux filesytem symantics (can't IOCTL until open is complete). + * Linux filesystem semantics (can't IOCTL until open is complete). */ i = cxl_alloc_sst(ctx); if (i) From dfc4ae3372182a168146745def03d877f31fcf2f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 25 Mar 2021 13:08:20 -0700 Subject: [PATCH 102/302] selftests/powerpc: unmark non-kernel-doc comments Drop the 'beginning of kernel-doc' notation markers (/**) in places that are not in kernel-doc format. Signed-off-by: Randy Dunlap Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210325200820.16594-1-rdunlap@infradead.org --- tools/testing/selftests/powerpc/tm/tm-trap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c index c75960af8018fb..11521077f91519 100644 --- a/tools/testing/selftests/powerpc/tm/tm-trap.c +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -66,7 +66,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc) /* Get thread endianness: extract bit LE from MSR */ thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR]; - /*** + /* * Little-Endian Machine */ @@ -126,7 +126,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc) } } - /*** + /* * Big-Endian Machine */ From d19b3ad02c2d1a9a697b7059e32fa2d97a420b15 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 23 Mar 2021 17:50:56 -0300 Subject: [PATCH 103/302] powerpc/pseries/hotplug-cpu: Show 'last online CPU' error in dlpar_cpu_offline() One of the reasons that dlpar_cpu_offline can fail is when attempting to offline the last online CPU of the kernel. This can be observed in a pseries QEMU guest that has hotplugged CPUs. If the user offlines all other CPUs of the guest, and a hotplugged CPU is now the last online CPU, trying to reclaim it will fail. The current error message in this situation returns rc with -EBUSY and a generic explanation, e.g.: pseries-hotplug-cpu: Failed to offline CPU PowerPC,POWER9, rc: -16 EBUSY can be caused by other conditions, such as cpu_hotplug_disable being true. Throwing a more specific error message for this case, instead of just "Failed to offline CPU", makes it clearer that the error is in fact a known error situation instead of other generic/unknown cause. This patch adds a 'last online' check in dlpar_cpu_offline() to catch the 'last online CPU' offline error, eturning a more informative error message: pseries-hotplug-cpu: Unable to remove last online CPU PowerPC,POWER9 Signed-off-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210323205056.52768-2-danielhb413@gmail.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 12cbffd3c2e32c..ec478f8a98ff26 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -271,6 +271,19 @@ static int dlpar_offline_cpu(struct device_node *dn) if (!cpu_online(cpu)) break; + /* + * device_offline() will return -EBUSY (via cpu_down()) if there + * is only one CPU left. Check it here to fail earlier and with a + * more informative error message, while also retaining the + * cpu_add_remove_lock to be sure that no CPUs are being + * online/offlined during this check. + */ + if (num_online_cpus() == 1) { + pr_warn("Unable to remove last online CPU %pOFn\n", dn); + rc = -EBUSY; + goto out_unlock; + } + cpu_maps_update_done(); rc = device_offline(get_cpu_device(cpu)); if (rc) @@ -283,6 +296,7 @@ static int dlpar_offline_cpu(struct device_node *dn) thread); } } +out_unlock: cpu_maps_update_done(); out: From 4fe529449d85e78972fa327999961ecc83a0b6db Mon Sep 17 00:00:00 2001 From: Chen Huang Date: Sat, 27 Mar 2021 09:49:00 +0000 Subject: [PATCH 104/302] powerpc: Fix HAVE_HARDLOCKUP_DETECTOR_ARCH build configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling the powerpc with the SMP disabled, it shows the issue: arch/powerpc/kernel/watchdog.c: In function ‘watchdog_smp_panic’: arch/powerpc/kernel/watchdog.c:177:4: error: implicit declaration of function ‘smp_send_nmi_ipi’; did you mean ‘smp_send_stop’? [-Werror=implicit-function-declaration] 177 | smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); | ^~~~~~~~~~~~~~~~ | smp_send_stop cc1: all warnings being treated as errors make[2]: *** [scripts/Makefile.build:273: arch/powerpc/kernel/watchdog.o] Error 1 make[1]: *** [scripts/Makefile.build:534: arch/powerpc/kernel] Error 2 make: *** [Makefile:1980: arch/powerpc] Error 2 make: *** Waiting for unfinished jobs.... We found that powerpc used ipi to implement hardlockup watchdog, so the HAVE_HARDLOCKUP_DETECTOR_ARCH should depend on the SMP. Fixes: 2104180a5369 ("powerpc/64s: implement arch-specific hardlockup watchdog") Reported-by: Hulk Robot Signed-off-by: Chen Huang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210327094900.938555-1-chenhuang5@huawei.com --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 08e594a4ffb877..c1344c05226c0f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -226,7 +226,7 @@ config PPC select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) - select HAVE_HARDLOCKUP_DETECTOR_ARCH if (PPC64 && PPC_BOOK3S) + select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC64 && PPC_BOOK3S && SMP select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 From 69931cc387cca289e0415c79ce5389119670066d Mon Sep 17 00:00:00 2001 From: dingsenjie Date: Fri, 26 Mar 2021 19:53:56 +0800 Subject: [PATCH 105/302] powerpc/powernv: Remove unneeded variable: "rc" Remove unneeded variable: "rc". Signed-off-by: dingsenjie Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210326115356.12444-1-dingsenjie@163.com --- arch/powerpc/platforms/powernv/opal-prd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index deddaebf8c14f8..a191f4c60ce71c 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -105,7 +105,6 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) { size_t addr, size; pgprot_t page_prot; - int rc; pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", vma->vm_start, vma->vm_end, vma->vm_pgoff, @@ -121,10 +120,8 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); - rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, page_prot); - - return rc; } static bool opal_msg_queue_empty(void) From 11d92156f7a862091009d7655d19c1e7de37fc7a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Mar 2021 12:09:38 +1100 Subject: [PATCH 106/302] powerpc/pseries: Only register vio drivers if vio bus exists The vio bus is a fake bus, which we use on pseries LPARs (guests) to discover devices provided by the hypervisor. There's no need or sense in creating the vio bus on bare metal systems. Which is why commit 4336b9337824 ("powerpc/pseries: Make vio and ibmebus initcalls pseries specific") made the initialisation of the vio bus only happen in LPARs. However as a result of that commit we now see errors at boot on bare metal systems: Driver 'hvc_console' was unable to register with bus_type 'vio' because the bus was not initialized. Driver 'tpm_ibmvtpm' was unable to register with bus_type 'vio' because the bus was not initialized. This happens because those drivers are built-in, and are calling vio_register_driver(). It in turn calls driver_register() with a reference to vio_bus_type, but we haven't registered vio_bus_type with the driver core. Fix it by also guarding vio_register_driver() with a check to see if we are on pseries. Fixes: 4336b9337824 ("powerpc/pseries: Make vio and ibmebus initcalls pseries specific") Reported-by: Paul Menzel Signed-off-by: Michael Ellerman Tested-by: Paul Menzel Reviewed-by: Tyrel Datwyler Link: https://lore.kernel.org/r/20210316010938.525657-1-mpe@ellerman.id.au --- arch/powerpc/platforms/pseries/vio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 9cb4fc839fd5d8..429053d0402ad1 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1285,6 +1285,10 @@ static int vio_bus_remove(struct device *dev) int __vio_register_driver(struct vio_driver *viodrv, struct module *owner, const char *mod_name) { + // vio_bus_type is only initialised for pseries + if (!machine_is(pseries)) + return -ENODEV; + pr_debug("%s: driver %s registering\n", __func__, viodrv->name); /* fill in 'struct driver' fields */ From 937c49d10b4dc8e81ed1a24ffab8d70bba138af1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 18 Mar 2021 09:18:55 +0530 Subject: [PATCH 107/302] powerpc/mm: Revert "powerpc/mm: Remove DEBUG_VM_PGTABLE support on powerpc" This reverts commit 675bceb097e6 ("powerpc/mm: Remove DEBUG_VM_PGTABLE support on powerpc") All the related issues are fixed as of commit: f14312e1ed1e ("mm/debug_vm_pgtable: avoid doing memory allocation with pgtable_t mapped.") Hence re-enable it. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210318034855.74513-1-aneesh.kumar@linux.ibm.com --- Documentation/features/debug/debug-vm-pgtable/arch-support.txt | 2 +- arch/powerpc/Kconfig | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt index 7aff505af706d7..fa83403b4aec09 100644 --- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt +++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt @@ -21,7 +21,7 @@ | nios2: | TODO | | openrisc: | TODO | | parisc: | TODO | - | powerpc: | TODO | + | powerpc: | ok | | riscv: | ok | | s390: | ok | | sh: | TODO | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c1344c05226c0f..6c400f877d8942 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -119,6 +119,7 @@ config PPC # select ARCH_32BIT_OFF_T if PPC32 select ARCH_HAS_DEBUG_VIRTUAL + select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE From 8cdf748d557f15ae6f9e0d4108cc3ea6e1ee4419 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:40 +0000 Subject: [PATCH 108/302] powerpc/uaccess: Remove __get_user_allowed() and unsafe_op_wrap() Those two macros have only one user which is unsafe_get_user(). Put everything in one place and remove them. Signed-off-by: Christophe Leroy Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/439179c5e54c18f2cb8bdf1eea13ea0ef6b98375.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c3d3d178fa0e14..57d7b1bfb7a27a 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -53,9 +53,6 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) -#define __get_user_allowed(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false) - #define __get_user_inatomic(x, ptr) \ __get_user_nosleep((x), (ptr), sizeof(*(ptr))) #define __put_user_inatomic(x, ptr) \ @@ -481,8 +478,11 @@ user_write_access_begin(const void __user *ptr, size_t len) #define user_write_access_begin user_write_access_begin #define user_write_access_end prevent_current_write_to_user -#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) -#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e) +#define unsafe_get_user(x, p, e) do { \ + if (unlikely(__get_user_nocheck((x), (p), sizeof(*(p)), false)))\ + goto e; \ +} while (0) + #define unsafe_put_user(x, p, e) \ __unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) From 9bd68dc5d7463cb959bff9ac4b6c7e578171de35 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:41 +0000 Subject: [PATCH 109/302] powerpc/uaccess: Define ___get_user_instr() for ppc32 Define simple ___get_user_instr() for ppc32 instead of defining ppc32 versions of the three get_user_instr() helpers. Signed-off-by: Christophe Leroy Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e02f83ec74f26d76df2874f0ce4d5cc69c3469ae.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 57d7b1bfb7a27a..250f535c08743c 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -81,6 +81,10 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) } \ __gui_ret; \ }) +#else /* !CONFIG_PPC64 */ +#define ___get_user_instr(gu_op, dest, ptr) \ + gu_op((dest).val, (u32 __user *)(ptr)) +#endif /* CONFIG_PPC64 */ #define get_user_instr(x, ptr) \ ___get_user_instr(get_user, x, ptr) @@ -91,18 +95,6 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) #define __get_user_instr_inatomic(x, ptr) \ ___get_user_instr(__get_user_inatomic, x, ptr) -#else /* !CONFIG_PPC64 */ -#define get_user_instr(x, ptr) \ - get_user((x).val, (u32 __user *)(ptr)) - -#define __get_user_instr(x, ptr) \ - __get_user_nocheck((x).val, (u32 __user *)(ptr), sizeof(u32), true) - -#define __get_user_instr_inatomic(x, ptr) \ - __get_user_nosleep((x).val, (u32 __user *)(ptr), sizeof(u32)) - -#endif /* CONFIG_PPC64 */ - extern long __put_user_bad(void); #define __put_user_size(x, ptr, size, retval) \ From 3fa3db32956d74c0784171ae0334685502bb169a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 13:25:11 +0000 Subject: [PATCH 110/302] powerpc/align: Convert emulate_spe() to user_access_begin This patch converts emulate_spe() to using user_access_begin logic. Since commit 662bbcb2747c ("mm, sched: Allow uaccess in atomic with pagefault_disable()"), might_fault() doesn't fire when called from sections where pagefaults are disabled, which must be the case when using _inatomic variants of __get_user and __put_user. So the might_fault() in user_access_begin() is not a problem. There was a verification of user_mode() together with the access_ok(), but there is a second verification of user_mode() just after, that leads to immediate return. The access_ok() is now part of the user_access_begin which is called after that other user_mode() verification, so no need to check user_mode() again. Signed-off-by: Christophe Leroy Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c95a648fdf75992c9d88f3c73cc23e7537fcf2ad.1615555354.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/align.c | 61 ++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index c7797eb958c73e..f362c99213be26 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -107,7 +107,6 @@ static struct aligninfo spe_aligninfo[32] = { static int emulate_spe(struct pt_regs *regs, unsigned int reg, struct ppc_inst ppc_instr) { - int ret; union { u64 ll; u32 w[2]; @@ -127,11 +126,6 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, nb = spe_aligninfo[instr].len; flags = spe_aligninfo[instr].flags; - /* Verify the address of the operand */ - if (unlikely(user_mode(regs) && - !access_ok(addr, nb))) - return -EFAULT; - /* userland only */ if (unlikely(!user_mode(regs))) return 0; @@ -169,26 +163,27 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, } } else { temp.ll = data.ll = 0; - ret = 0; p = addr; + if (!user_read_access_begin(addr, nb)) + return -EFAULT; + switch (nb) { case 8: - ret |= __get_user_inatomic(temp.v[0], p++); - ret |= __get_user_inatomic(temp.v[1], p++); - ret |= __get_user_inatomic(temp.v[2], p++); - ret |= __get_user_inatomic(temp.v[3], p++); + unsafe_get_user(temp.v[0], p++, Efault_read); + unsafe_get_user(temp.v[1], p++, Efault_read); + unsafe_get_user(temp.v[2], p++, Efault_read); + unsafe_get_user(temp.v[3], p++, Efault_read); fallthrough; case 4: - ret |= __get_user_inatomic(temp.v[4], p++); - ret |= __get_user_inatomic(temp.v[5], p++); + unsafe_get_user(temp.v[4], p++, Efault_read); + unsafe_get_user(temp.v[5], p++, Efault_read); fallthrough; case 2: - ret |= __get_user_inatomic(temp.v[6], p++); - ret |= __get_user_inatomic(temp.v[7], p++); - if (unlikely(ret)) - return -EFAULT; + unsafe_get_user(temp.v[6], p++, Efault_read); + unsafe_get_user(temp.v[7], p++, Efault_read); } + user_read_access_end(); switch (instr) { case EVLDD: @@ -255,31 +250,41 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, /* Store result to memory or update registers */ if (flags & ST) { - ret = 0; p = addr; + + if (!user_write_access_begin(addr, nb)) + return -EFAULT; + switch (nb) { case 8: - ret |= __put_user_inatomic(data.v[0], p++); - ret |= __put_user_inatomic(data.v[1], p++); - ret |= __put_user_inatomic(data.v[2], p++); - ret |= __put_user_inatomic(data.v[3], p++); + unsafe_put_user(data.v[0], p++, Efault_write); + unsafe_put_user(data.v[1], p++, Efault_write); + unsafe_put_user(data.v[2], p++, Efault_write); + unsafe_put_user(data.v[3], p++, Efault_write); fallthrough; case 4: - ret |= __put_user_inatomic(data.v[4], p++); - ret |= __put_user_inatomic(data.v[5], p++); + unsafe_put_user(data.v[4], p++, Efault_write); + unsafe_put_user(data.v[5], p++, Efault_write); fallthrough; case 2: - ret |= __put_user_inatomic(data.v[6], p++); - ret |= __put_user_inatomic(data.v[7], p++); + unsafe_put_user(data.v[6], p++, Efault_write); + unsafe_put_user(data.v[7], p++, Efault_write); } - if (unlikely(ret)) - return -EFAULT; + user_write_access_end(); } else { *evr = data.w[0]; regs->gpr[reg] = data.w[1]; } return 1; + +Efault_read: + user_read_access_end(); + return -EFAULT; + +Efault_write: + user_write_access_end(); + return -EFAULT; } #endif /* CONFIG_SPE */ From bad956b8fe1a8b3b634d596ed2023ec30726cdf1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:43 +0000 Subject: [PATCH 111/302] powerpc/uaccess: Remove __get/put_user_inatomic() Powerpc is the only architecture having _inatomic variants of __get_user() and __put_user() accessors. They were introduced by commit e68c825bb016 ("[POWERPC] Add inatomic versions of __get_user and __put_user"). Those variants expand to the _nosleep macros instead of expanding to the _nocheck macros. The only difference between the _nocheck and the _nosleep macros is the call to might_fault(). Since commit 662bbcb2747c ("mm, sched: Allow uaccess in atomic with pagefault_disable()"), __get/put_user() can be used in atomic parts of the code, therefore __get/put_user_inatomic() have become useless. Remove __get_user_inatomic() and __put_user_inatomic(). Signed-off-by: Christophe Leroy Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1e5c895669e8d54a7810b62dc61eb111f33c2c37.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 37 ------------------- .../kernel/hw_breakpoint_constraints.c | 2 +- arch/powerpc/kernel/traps.c | 2 +- 3 files changed, 2 insertions(+), 39 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 250f535c08743c..bf625c777aa05f 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -53,11 +53,6 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) -#define __get_user_inatomic(x, ptr) \ - __get_user_nosleep((x), (ptr), sizeof(*(ptr))) -#define __put_user_inatomic(x, ptr) \ - __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - #ifdef CONFIG_PPC64 #define ___get_user_instr(gu_op, dest, ptr) \ @@ -92,9 +87,6 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) #define __get_user_instr(x, ptr) \ ___get_user_instr(__get_user, x, ptr) -#define __get_user_instr_inatomic(x, ptr) \ - ___get_user_instr(__get_user_inatomic, x, ptr) - extern long __put_user_bad(void); #define __put_user_size(x, ptr, size, retval) \ @@ -141,20 +133,6 @@ __pu_failed: \ __pu_err; \ }) -#define __put_user_nosleep(x, ptr, size) \ -({ \ - long __pu_err; \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __typeof__(*(ptr)) __pu_val = (x); \ - __typeof__(size) __pu_size = (size); \ - \ - __chk_user_ptr(__pu_addr); \ - __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ - \ - __pu_err; \ -}) - - /* * We don't tell gcc that we are accessing memory, but this is OK * because we do not write to any memory gcc knows about, so there @@ -320,21 +298,6 @@ do { \ __gu_err; \ }) -#define __get_user_nosleep(x, ptr, size) \ -({ \ - long __gu_err; \ - __long_type(*(ptr)) __gu_val; \ - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __typeof__(size) __gu_size = (size); \ - \ - __chk_user_ptr(__gu_addr); \ - __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ - \ - __gu_err; \ -}) - - /* more complex routines */ extern unsigned long __copy_tofrom_user(void __user *to, diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index 867ee4aa026ad7..675d1f66ab7283 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -141,7 +141,7 @@ void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, { struct instruction_op op; - if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip)) + if (__get_user_instr(*instr, (void __user *)regs->nip)) return; analyse_instr(&op, regs, *instr); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 76d17492e0e52d..efba9987069171 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -863,7 +863,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) unsigned long ea, msr, msr_mask; bool swap; - if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip)) + if (__get_user(instr, (unsigned int __user *)regs->nip)) return; /* From 35506a3e2d7c4d93cb564e23471a448cbd98f085 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:44 +0000 Subject: [PATCH 112/302] powerpc/uaccess: Move get_user_instr helpers in asm/inst.h Those helpers use get_user helpers but they don't participate in their implementation, so they do not belong to asm/uaccess.h Move them in asm/inst.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2c6e83581b4fa434aa7cf2fa7714c41e98f57007.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/inst.h | 34 ++++++++++++++++++++++++++++++ arch/powerpc/include/asm/uaccess.h | 34 ------------------------------ 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index cc73c12675721c..19e18af2fac9d4 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -4,6 +4,40 @@ #include +#ifdef CONFIG_PPC64 + +#define ___get_user_instr(gu_op, dest, ptr) \ +({ \ + long __gui_ret = 0; \ + unsigned long __gui_ptr = (unsigned long)ptr; \ + struct ppc_inst __gui_inst; \ + unsigned int __prefix, __suffix; \ + __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ + if (__gui_ret == 0) { \ + if ((__prefix >> 26) == OP_PREFIX) { \ + __gui_ret = gu_op(__suffix, \ + (unsigned int __user *)__gui_ptr + 1); \ + __gui_inst = ppc_inst_prefix(__prefix, \ + __suffix); \ + } else { \ + __gui_inst = ppc_inst(__prefix); \ + } \ + if (__gui_ret == 0) \ + (dest) = __gui_inst; \ + } \ + __gui_ret; \ +}) +#else /* !CONFIG_PPC64 */ +#define ___get_user_instr(gu_op, dest, ptr) \ + gu_op((dest).val, (u32 __user *)(ptr)) +#endif /* CONFIG_PPC64 */ + +#define get_user_instr(x, ptr) \ + ___get_user_instr(get_user, x, ptr) + +#define __get_user_instr(x, ptr) \ + ___get_user_instr(__get_user, x, ptr) + /* * Instruction data type for POWER */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index bf625c777aa05f..8b81535e68d49c 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -53,40 +53,6 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) -#ifdef CONFIG_PPC64 - -#define ___get_user_instr(gu_op, dest, ptr) \ -({ \ - long __gui_ret = 0; \ - unsigned long __gui_ptr = (unsigned long)ptr; \ - struct ppc_inst __gui_inst; \ - unsigned int __prefix, __suffix; \ - __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ - if (__gui_ret == 0) { \ - if ((__prefix >> 26) == OP_PREFIX) { \ - __gui_ret = gu_op(__suffix, \ - (unsigned int __user *)__gui_ptr + 1); \ - __gui_inst = ppc_inst_prefix(__prefix, \ - __suffix); \ - } else { \ - __gui_inst = ppc_inst(__prefix); \ - } \ - if (__gui_ret == 0) \ - (dest) = __gui_inst; \ - } \ - __gui_ret; \ -}) -#else /* !CONFIG_PPC64 */ -#define ___get_user_instr(gu_op, dest, ptr) \ - gu_op((dest).val, (u32 __user *)(ptr)) -#endif /* CONFIG_PPC64 */ - -#define get_user_instr(x, ptr) \ - ___get_user_instr(get_user, x, ptr) - -#define __get_user_instr(x, ptr) \ - ___get_user_instr(__get_user, x, ptr) - extern long __put_user_bad(void); #define __put_user_size(x, ptr, size, retval) \ From 111631b5e9dae764754657aad00bd6cd1a805d0d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:45 +0000 Subject: [PATCH 113/302] powerpc/align: Don't use __get_user_instr() on kernel addresses In the old days, when we didn't have kernel userspace access protection and had set_fs(), it was wise to use __get_user() and friends to read kernel memory. Nowadays, get_user() is granting userspace access and is exclusively for userspace access. In alignment exception handler, use probe_kernel_read_inst() instead of __get_user_instr() for reading instructions in kernel. This will allow to remove the is_kernel_addr() check in __get/put_user() in a following patch. Signed-off-by: Christophe Leroy Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d9ecbce00178484e66ca7adec2ff210058037704.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/align.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index f362c99213be26..a97d5f1a390594 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -310,7 +310,12 @@ int fix_alignment(struct pt_regs *regs) */ CHECK_FULL_REGS(regs); - if (unlikely(__get_user_instr(instr, (void __user *)regs->nip))) + if (is_kernel_addr(regs->nip)) + r = probe_kernel_read_inst(&instr, (void *)regs->nip); + else + r = __get_user_instr(instr, (void __user *)regs->nip); + + if (unlikely(r)) return -EFAULT; if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) { /* We don't handle PPC little-endian any more... */ From ed0d9c66f97c6865e87fa6e3631bbc3919a31ad6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:46 +0000 Subject: [PATCH 114/302] powerpc/uaccess: Call might_fault() inconditionaly Commit 6bfd93c32a50 ("powerpc: Fix incorrect might_sleep in __get_user/__put_user on kernel addresses") added a check to not call might_sleep() on kernel addresses. This was to enable the use of __get_user() in the alignment exception handler for any address. Then commit 95156f0051cb ("lockdep, mm: fix might_fault() annotation") added a check of the address space in might_fault(), based on set_fs() logic. But this didn't solve the powerpc alignment exception case as it didn't call set_fs(KERNEL_DS). Nowadays, set_fs() is gone, previous patch fixed the alignment exception handler and __get_user/__put_user are not supposed to be used anymore to read kernel memory. Therefore the is_kernel_addr() check has become useless and can be removed. Signed-off-by: Christophe Leroy Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e0a980a4dc7a2551183dd5cb30f46eafdbee390c.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 8b81535e68d49c..ce3dc3f407ac73 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -77,8 +77,7 @@ __pu_failed: \ __typeof__(*(ptr)) __pu_val = (x); \ __typeof__(size) __pu_size = (size); \ \ - if (!is_kernel_addr((unsigned long)__pu_addr)) \ - might_fault(); \ + might_fault(); \ __chk_user_ptr(__pu_addr); \ __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ \ @@ -238,12 +237,12 @@ do { \ __typeof__(size) __gu_size = (size); \ \ __chk_user_ptr(__gu_addr); \ - if (do_allow && !is_kernel_addr((unsigned long)__gu_addr)) \ + if (do_allow) { \ might_fault(); \ - if (do_allow) \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - else \ + } else { \ __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ + } \ (x) = (__typeof__(*(ptr)))__gu_val; \ \ __gu_err; \ From be15a165796598cd3929ca9aac56ba5ec69e41c1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:47 +0000 Subject: [PATCH 115/302] powerpc/uaccess: Remove __unsafe_put_user_goto() __unsafe_put_user_goto() is just an intermediate layer to __put_user_size_goto() without added value other than doing the __user pointer type checking. Do the __user pointer type checking in __put_user_size_goto() and remove __unsafe_put_user_goto(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b6552149209aebd887a6977272b06a41256bdb9f.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index ce3dc3f407ac73..ea167ac3aaf78e 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -130,23 +130,17 @@ __pu_failed: \ #define __put_user_size_goto(x, ptr, size, label) \ do { \ + __typeof__(*(ptr)) __user *__pus_addr = (ptr); \ + \ switch (size) { \ - case 1: __put_user_asm_goto(x, ptr, label, "stb"); break; \ - case 2: __put_user_asm_goto(x, ptr, label, "sth"); break; \ - case 4: __put_user_asm_goto(x, ptr, label, "stw"); break; \ - case 8: __put_user_asm2_goto(x, ptr, label); break; \ + case 1: __put_user_asm_goto(x, __pus_addr, label, "stb"); break; \ + case 2: __put_user_asm_goto(x, __pus_addr, label, "sth"); break; \ + case 4: __put_user_asm_goto(x, __pus_addr, label, "stw"); break; \ + case 8: __put_user_asm2_goto(x, __pus_addr, label); break; \ default: __put_user_bad(); \ } \ } while (0) -#define __unsafe_put_user_goto(x, ptr, size, label) \ -do { \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __chk_user_ptr(ptr); \ - __put_user_size_goto((x), __pu_addr, (size), label); \ -} while (0) - - extern long __get_user_bad(void); /* @@ -404,7 +398,7 @@ user_write_access_begin(const void __user *ptr, size_t len) } while (0) #define unsafe_put_user(x, p, e) \ - __unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) + __put_user_size_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) #define unsafe_copy_from_user(d, s, l, e) \ do { \ From 028e15616857add3ba4951f989027675370b0e82 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:48 +0000 Subject: [PATCH 116/302] powerpc/uaccess: Remove __chk_user_ptr() in __get/put_user Commit d02f6b7dab82 ("powerpc/uaccess: Evaluate macro arguments once, before user access is allowed") changed the __chk_user_ptr() argument from the passed ptr pointer to the locally declared __gu_addr. But __gu_addr is locally defined as __user so the check is pointless. During kernel build __chk_user_ptr() voids and is only evaluated during sparse checks so it should have been armless to leave the original pointer check there. Nevertheless, this check is indeed redundant with the assignment above which casts the ptr pointer to the local __user __gu_addr. In case of mismatch, sparse will detect it there, so the __check_user_ptr() is not needed anywhere else than in access_ok(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/69f17d75046733b891ab2e668dbf464787cdf598.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index ea167ac3aaf78e..605fa79a6e6fc8 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -78,7 +78,6 @@ __pu_failed: \ __typeof__(size) __pu_size = (size); \ \ might_fault(); \ - __chk_user_ptr(__pu_addr); \ __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ \ __pu_err; \ @@ -197,7 +196,6 @@ extern long __get_user_bad(void); #define __get_user_size_allowed(x, ptr, size, retval) \ do { \ retval = 0; \ - __chk_user_ptr(ptr); \ if (size > sizeof(x)) \ (x) = __get_user_bad(); \ switch (size) { \ @@ -230,7 +228,6 @@ do { \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __typeof__(size) __gu_size = (size); \ \ - __chk_user_ptr(__gu_addr); \ if (do_allow) { \ might_fault(); \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ From 9975f852ce1bf041a1a81bf882e29ee7a3b78ca6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:49 +0000 Subject: [PATCH 117/302] powerpc/uaccess: Remove calls to __get_user_bad() and __put_user_bad() __get_user_bad() and __put_user_bad() are functions that are declared but not defined, in order to make the link fail in case they are called. Nowadays, we have BUILD_BUG() and BUILD_BUG_ON() for that, and they have the advantage to break the build earlier as it breaks it at compile time instead of link time. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d7d839e994f49fae4ff7b70fac72bd951272436b.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 605fa79a6e6fc8..47a454d19351df 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -53,8 +53,6 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) -extern long __put_user_bad(void); - #define __put_user_size(x, ptr, size, retval) \ do { \ __label__ __pu_failed; \ @@ -136,12 +134,10 @@ do { \ case 2: __put_user_asm_goto(x, __pus_addr, label, "sth"); break; \ case 4: __put_user_asm_goto(x, __pus_addr, label, "stw"); break; \ case 8: __put_user_asm2_goto(x, __pus_addr, label); break; \ - default: __put_user_bad(); \ + default: BUILD_BUG(); \ } \ } while (0) -extern long __get_user_bad(void); - /* * This does an atomic 128 byte aligned load from userspace. * Upto caller to do enable_kernel_vmx() before calling! @@ -196,14 +192,13 @@ extern long __get_user_bad(void); #define __get_user_size_allowed(x, ptr, size, retval) \ do { \ retval = 0; \ - if (size > sizeof(x)) \ - (x) = __get_user_bad(); \ + BUILD_BUG_ON(size > sizeof(x)); \ switch (size) { \ case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break; \ case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break; \ case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break; \ case 8: __get_user_asm2(x, (u64 __user *)ptr, retval); break; \ - default: (x) = __get_user_bad(); \ + default: BUILD_BUG(); \ } \ } while (0) From f904c22f2a9fb09fe705efdedbe4af9a30bdf633 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:50 +0000 Subject: [PATCH 118/302] powerpc/uaccess: Split out __get_user_nocheck() One part of __get_user_nocheck() is used for __get_user(), the other part for unsafe_get_user(). Move the part dedicated to unsafe_get_user() in it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/618fe2e0626b308a5a063d5baac827b968e85c32.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 47a454d19351df..2395bdc991bdf4 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -49,7 +49,7 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) #define __get_user(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true) + __get_user_nocheck((x), (ptr), sizeof(*(ptr))) #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) @@ -216,19 +216,15 @@ do { \ #define __long_type(x) \ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) -#define __get_user_nocheck(x, ptr, size, do_allow) \ +#define __get_user_nocheck(x, ptr, size) \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __typeof__(size) __gu_size = (size); \ \ - if (do_allow) { \ - might_fault(); \ - __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - } else { \ - __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ - } \ + might_fault(); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ \ __gu_err; \ @@ -385,8 +381,14 @@ user_write_access_begin(const void __user *ptr, size_t len) #define user_write_access_end prevent_current_write_to_user #define unsafe_get_user(x, p, e) do { \ - if (unlikely(__get_user_nocheck((x), (p), sizeof(*(p)), false)))\ - goto e; \ + long __gu_err; \ + __long_type(*(p)) __gu_val; \ + __typeof__(*(p)) __user *__gu_addr = (p); \ + \ + __get_user_size_allowed(__gu_val, __gu_addr, sizeof(*(p)), __gu_err); \ + if (__gu_err) \ + goto e; \ + (x) = (__typeof__(*(p)))__gu_val; \ } while (0) #define unsafe_put_user(x, p, e) \ From 17f8c0bc21bbb7d1fe729c7f656924a6ea72079b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:51 +0000 Subject: [PATCH 119/302] powerpc/uaccess: Rename __get/put_user_check/nocheck __get_user_check() becomes get_user() __put_user_check() becomes put_user() __get_user_nocheck() becomes __get_user() __put_user_nocheck() becomes __put_user() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/41d7e45f4733f0e61e63824e4865b4e049db74d6.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 2395bdc991bdf4..065af18e4b568e 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -43,16 +43,6 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) * exception handling means that it's no longer "just"...) * */ -#define get_user(x, ptr) \ - __get_user_check((x), (ptr), sizeof(*(ptr))) -#define put_user(x, ptr) \ - __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - -#define __get_user(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr))) -#define __put_user(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - #define __put_user_size(x, ptr, size, retval) \ do { \ __label__ __pu_failed; \ @@ -68,12 +58,12 @@ __pu_failed: \ prevent_write_to_user(ptr, size); \ } while (0) -#define __put_user_nocheck(x, ptr, size) \ +#define __put_user(x, ptr) \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __typeof__(*(ptr)) __pu_val = (x); \ - __typeof__(size) __pu_size = (size); \ + __typeof__(*(ptr)) __pu_val = (__typeof__(*(ptr)))(x); \ + __typeof__(sizeof(*(ptr))) __pu_size = sizeof(*(ptr)); \ \ might_fault(); \ __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ @@ -81,12 +71,12 @@ __pu_failed: \ __pu_err; \ }) -#define __put_user_check(x, ptr, size) \ +#define put_user(x, ptr) \ ({ \ long __pu_err = -EFAULT; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __typeof__(*(ptr)) __pu_val = (x); \ - __typeof__(size) __pu_size = (size); \ + __typeof__(*(ptr)) __pu_val = (__typeof__(*(ptr)))(x); \ + __typeof__(sizeof(*(ptr))) __pu_size = sizeof(*(ptr)); \ \ might_fault(); \ if (access_ok(__pu_addr, __pu_size)) \ @@ -216,12 +206,12 @@ do { \ #define __long_type(x) \ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) -#define __get_user_nocheck(x, ptr, size) \ +#define __get_user(x, ptr) \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __typeof__(size) __gu_size = (size); \ + __typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr)); \ \ might_fault(); \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ @@ -230,12 +220,12 @@ do { \ __gu_err; \ }) -#define __get_user_check(x, ptr, size) \ +#define get_user(x, ptr) \ ({ \ long __gu_err = -EFAULT; \ __long_type(*(ptr)) __gu_val = 0; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __typeof__(size) __gu_size = (size); \ + __typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr)); \ \ might_fault(); \ if (access_ok(__gu_addr, __gu_size)) \ From e72fcdb26cde72985c418b39f72ecaa222e1f4d5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:52 +0000 Subject: [PATCH 120/302] powerpc/uaccess: Refactor get/put_user() and __get/put_user() Make get_user() do the access_ok() check then call __get_user(). Make put_user() do the access_ok() check then call __put_user(). Then embed __get_user_size() and __put_user_size() in __get_user() and __put_user(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/eebc554f6a81f570c46ea3551000ff5b886e4faa.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 66 +++++++++++------------------- 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 065af18e4b568e..9612889a7a4230 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -43,21 +43,6 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) * exception handling means that it's no longer "just"...) * */ -#define __put_user_size(x, ptr, size, retval) \ -do { \ - __label__ __pu_failed; \ - \ - retval = 0; \ - allow_write_to_user(ptr, size); \ - __put_user_size_goto(x, ptr, size, __pu_failed); \ - prevent_write_to_user(ptr, size); \ - break; \ - \ -__pu_failed: \ - retval = -EFAULT; \ - prevent_write_to_user(ptr, size); \ -} while (0) - #define __put_user(x, ptr) \ ({ \ long __pu_err; \ @@ -66,23 +51,29 @@ __pu_failed: \ __typeof__(sizeof(*(ptr))) __pu_size = sizeof(*(ptr)); \ \ might_fault(); \ - __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + do { \ + __label__ __pu_failed; \ + \ + allow_write_to_user(__pu_addr, __pu_size); \ + __put_user_size_goto(__pu_val, __pu_addr, __pu_size, __pu_failed); \ + prevent_write_to_user(__pu_addr, __pu_size); \ + __pu_err = 0; \ + break; \ + \ +__pu_failed: \ + prevent_write_to_user(__pu_addr, __pu_size); \ + __pu_err = -EFAULT; \ + } while (0); \ \ __pu_err; \ }) #define put_user(x, ptr) \ ({ \ - long __pu_err = -EFAULT; \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __typeof__(*(ptr)) __pu_val = (__typeof__(*(ptr)))(x); \ - __typeof__(sizeof(*(ptr))) __pu_size = sizeof(*(ptr)); \ + __typeof__(*(ptr)) __user *_pu_addr = (ptr); \ \ - might_fault(); \ - if (access_ok(__pu_addr, __pu_size)) \ - __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ - \ - __pu_err; \ + access_ok(_pu_addr, sizeof(*(ptr))) ? \ + __put_user(x, _pu_addr) : -EFAULT; \ }) /* @@ -192,13 +183,6 @@ do { \ } \ } while (0) -#define __get_user_size(x, ptr, size, retval) \ -do { \ - allow_read_from_user(ptr, size); \ - __get_user_size_allowed(x, ptr, size, retval); \ - prevent_read_from_user(ptr, size); \ -} while (0) - /* * This is a type: either unsigned long, if the argument fits into * that type, or otherwise unsigned long long. @@ -214,7 +198,9 @@ do { \ __typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr)); \ \ might_fault(); \ - __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ + allow_read_from_user(__gu_addr, __gu_size); \ + __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ + prevent_read_from_user(__gu_addr, __gu_size); \ (x) = (__typeof__(*(ptr)))__gu_val; \ \ __gu_err; \ @@ -222,17 +208,11 @@ do { \ #define get_user(x, ptr) \ ({ \ - long __gu_err = -EFAULT; \ - __long_type(*(ptr)) __gu_val = 0; \ - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr)); \ - \ - might_fault(); \ - if (access_ok(__gu_addr, __gu_size)) \ - __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + __typeof__(*(ptr)) __user *_gu_addr = (ptr); \ \ - __gu_err; \ + access_ok(_gu_addr, sizeof(*(ptr))) ? \ + __get_user(x, _gu_addr) : \ + ((x) = (__force __typeof__(*(ptr)))0, -EFAULT); \ }) /* more complex routines */ From 035785ab2826beb43cfa65a2df37d60074915a4d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:53 +0000 Subject: [PATCH 121/302] powerpc/uaccess: Introduce __get_user_size_goto() We have got two places doing a goto based on the result of __get_user_size_allowed(). Refactor that into __get_user_size_goto(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/def8a39289e02653cfb1583b3b19837de9efed3a.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 9612889a7a4230..797a1444988bc5 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -183,6 +183,15 @@ do { \ } \ } while (0) +#define __get_user_size_goto(x, ptr, size, label) \ +do { \ + long __gus_retval; \ + \ + __get_user_size_allowed(x, ptr, size, __gus_retval); \ + if (__gus_retval) \ + goto label; \ +} while (0) + /* * This is a type: either unsigned long, if the argument fits into * that type, or otherwise unsigned long long. @@ -351,13 +360,10 @@ user_write_access_begin(const void __user *ptr, size_t len) #define user_write_access_end prevent_current_write_to_user #define unsafe_get_user(x, p, e) do { \ - long __gu_err; \ __long_type(*(p)) __gu_val; \ __typeof__(*(p)) __user *__gu_addr = (p); \ \ - __get_user_size_allowed(__gu_val, __gu_addr, sizeof(*(p)), __gu_err); \ - if (__gu_err) \ - goto e; \ + __get_user_size_goto(__gu_val, __gu_addr, sizeof(*(p)), e); \ (x) = (__typeof__(*(p)))__gu_val; \ } while (0) @@ -409,14 +415,8 @@ do { \ #define HAVE_GET_KERNEL_NOFAULT #define __get_kernel_nofault(dst, src, type, err_label) \ -do { \ - int __kr_err; \ - \ - __get_user_size_allowed(*((type *)(dst)), (__force type __user *)(src),\ - sizeof(type), __kr_err); \ - if (unlikely(__kr_err)) \ - goto err_label; \ -} while (0) + __get_user_size_goto(*((type *)(dst)), \ + (__force type __user *)(src), sizeof(type), err_label) #define __put_kernel_nofault(dst, src, type, err_label) \ __put_user_size_goto(*((type *)(src)), \ From 5cd29b1fd3e8f2b45fe6d011588d832417defe31 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 17:46:54 +0000 Subject: [PATCH 122/302] powerpc/uaccess: Use asm goto for get_user when compiler supports it clang 11 and future GCC are supporting asm goto with outputs. Use it to implement get_user in order to get better generated code. Note that clang requires to set x in the default branch of __get_user_size_goto() otherwise is compliant about x not being initialised :puzzled: Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/403745b5aaa1b315bb4e8e46c1ba949e77eecec0.1615398265.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 55 ++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 797a1444988bc5..77d837b16e4d55 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -136,6 +136,59 @@ do { \ : "=r" (err) \ : "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err)) +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define __get_user_asm_goto(x, addr, label, op) \ + asm_volatile_goto( \ + "1: "op"%U1%X1 %0, %1 # get_user\n" \ + EX_TABLE(1b, %l2) \ + : "=r" (x) \ + : "m"UPD_CONSTR (*addr) \ + : \ + : label) + +#ifdef __powerpc64__ +#define __get_user_asm2_goto(x, addr, label) \ + __get_user_asm_goto(x, addr, label, "ld") +#else /* __powerpc64__ */ +#define __get_user_asm2_goto(x, addr, label) \ + asm_volatile_goto( \ + "1: lwz%X1 %0, %1\n" \ + "2: lwz%X1 %L0, %L1\n" \ + EX_TABLE(1b, %l2) \ + EX_TABLE(2b, %l2) \ + : "=r" (x) \ + : "m" (*addr) \ + : \ + : label) +#endif /* __powerpc64__ */ + +#define __get_user_size_goto(x, ptr, size, label) \ +do { \ + BUILD_BUG_ON(size > sizeof(x)); \ + switch (size) { \ + case 1: __get_user_asm_goto(x, (u8 __user *)ptr, label, "lbz"); break; \ + case 2: __get_user_asm_goto(x, (u16 __user *)ptr, label, "lhz"); break; \ + case 4: __get_user_asm_goto(x, (u32 __user *)ptr, label, "lwz"); break; \ + case 8: __get_user_asm2_goto(x, (u64 __user *)ptr, label); break; \ + default: x = 0; BUILD_BUG(); \ + } \ +} while (0) + +#define __get_user_size_allowed(x, ptr, size, retval) \ +do { \ + __label__ __gus_failed; \ + \ + __get_user_size_goto(x, ptr, size, __gus_failed); \ + retval = 0; \ + break; \ +__gus_failed: \ + x = 0; \ + retval = -EFAULT; \ +} while (0) + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + #define __get_user_asm(x, addr, err, op) \ __asm__ __volatile__( \ "1: "op"%U2%X2 %1, %2 # get_user\n" \ @@ -192,6 +245,8 @@ do { \ goto label; \ } while (0) +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + /* * This is a type: either unsigned long, if the argument fits into * that type, or otherwise unsigned long long. From fb05121fd6a20f0830ff2a4420c51af6ca4ac6e7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 11:06:50 +0000 Subject: [PATCH 123/302] signal: Add unsafe_get_compat_sigset() In the same way as commit 14026b94ccfe ("signal: Add unsafe_put_compat_sigset()"), this time add unsafe_get_compat_sigset() macro which is the 'unsafe' version of get_compat_sigset() For the bigendian, use unsafe_get_user() directly to avoid intermediate copy through the stack. For the littleendian, use a straight unsafe_copy_from_user(). This commit adds the generic fallback for unsafe_copy_from_user(). Architectures wanting to use unsafe_get_compat_sigset() have to make sure they have their own unsafe_copy_from_user(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b05bf434ee13c76bc9df5f02653a10db5e7b54e5.1616151715.git.christophe.leroy@csgroup.eu --- include/linux/compat.h | 35 +++++++++++++++++++++++++++++++++++ include/linux/uaccess.h | 1 + 2 files changed, 36 insertions(+) diff --git a/include/linux/compat.h b/include/linux/compat.h index 6e65be75360321..5112c3e35782fa 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -465,6 +465,34 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, unsafe_put_user(__s->sig[0], &__c->sig[0], label); \ } \ } while (0) + +#define unsafe_get_compat_sigset(set, compat, label) do { \ + const compat_sigset_t __user *__c = compat; \ + compat_sigset_word hi, lo; \ + sigset_t *__s = set; \ + \ + switch (_NSIG_WORDS) { \ + case 4: \ + unsafe_get_user(lo, &__c->sig[7], label); \ + unsafe_get_user(hi, &__c->sig[6], label); \ + __s->sig[3] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 3: \ + unsafe_get_user(lo, &__c->sig[5], label); \ + unsafe_get_user(hi, &__c->sig[4], label); \ + __s->sig[2] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 2: \ + unsafe_get_user(lo, &__c->sig[3], label); \ + unsafe_get_user(hi, &__c->sig[2], label); \ + __s->sig[1] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 1: \ + unsafe_get_user(lo, &__c->sig[1], label); \ + unsafe_get_user(hi, &__c->sig[0], label); \ + __s->sig[0] = hi | (((long)lo) << 32); \ + } \ +} while (0) #else #define unsafe_put_compat_sigset(compat, set, label) do { \ compat_sigset_t __user *__c = compat; \ @@ -472,6 +500,13 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, \ unsafe_copy_to_user(__c, __s, sizeof(*__c), label); \ } while (0) + +#define unsafe_get_compat_sigset(set, compat, label) do { \ + const compat_sigset_t __user *__c = compat; \ + sigset_t *__s = set; \ + \ + unsafe_copy_from_user(__s, __c, sizeof(*__c), label); \ +} while (0) #endif extern int compat_ptrace_request(struct task_struct *child, diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index c7c6e8b8344d49..c05e903cef02a3 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -397,6 +397,7 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); #define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e) #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e) #define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e) +#define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e) static inline unsigned long user_access_save(void) { return 0UL; } static inline void user_access_restore(unsigned long flags) { } #endif From c1cc1570bc8d94f288060f262f11be8f7672578c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 11:06:51 +0000 Subject: [PATCH 124/302] powerpc/uaccess: Also perform 64 bits copies in unsafe_copy_from_user() on ppc32 Similarly to commit 5cf773fc8f37 ("powerpc/uaccess: Also perform 64 bits copies in unsafe_copy_to_user() on ppc32") ppc32 has an efficiant 64 bits unsafe_get_user(), so also use it in order to unroll loops more. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/308e65d9237a14e8c0e3b22919fcf0b5e5592608.1616151715.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 77d837b16e4d55..a4e791bcd3fe24 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -432,9 +432,9 @@ do { \ size_t _len = (l); \ int _i; \ \ - for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long)) \ - unsafe_get_user(*(long *)(_dst + _i), (long __user *)(_src + _i), e); \ - if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) { \ + for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64)) \ + unsafe_get_user(*(u64 *)(_dst + _i), (u64 __user *)(_src + _i), e); \ + if (_len & 4) { \ unsafe_get_user(*(u32 *)(_dst + _i), (u32 __user *)(_src + _i), e); \ _i += 4; \ } \ From 7c11f8893a76ac4e86c07f4b57371d5fa593627f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 11:06:52 +0000 Subject: [PATCH 125/302] powerpc/signal: Add unsafe_copy_ck{fpr/vsx}_from_user Add unsafe_copy_ckfpr_from_user() and unsafe_copy_ckvsx_from_user() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1040687aa27553d19f749f7fb48f0c07af98ee2d.1616151715.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 1393876f38143f..a5152ff3c52f3b 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -100,6 +100,26 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \ &buf[i], label);\ } while (0) + +#define unsafe_copy_ckfpr_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1 ; i++) \ + unsafe_get_user(__t->thread.TS_CKFPR(i), &buf[i], label);\ + unsafe_get_user(__t->thread.ckfp_state.fpscr, &buf[i], failed); \ +} while (0) + +#define unsafe_copy_ckvsx_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_get_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label); \ +} while (0) #endif #elif defined(CONFIG_PPC_FPU_REGS) From f918a81e209f24acb45cd935bcfb78d2c024f6a1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 11:06:53 +0000 Subject: [PATCH 126/302] powerpc/signal32: Rename save_user_regs_unsafe() and save_general_regs_unsafe() Convention is to prefix functions with __unsafe_ instead of suffixing it with _unsafe. Rename save_user_regs_unsafe() and save_general_regs_unsafe() accordingly, that is respectively __unsafe_save_general_regs() and __unsafe_save_user_regs(). Suggested-by: Christopher M. Riedl Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8cef43607e5b35a7fd0829dec812d88beb570df2.1616151715.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index c505b444a6131e..3b78748d6d85b3 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -94,7 +94,7 @@ static inline int get_sigset_t(sigset_t *set, #define from_user_ptr(p) compat_ptr(p) static __always_inline int -save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) +__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int val, i; @@ -151,7 +151,7 @@ static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) #define from_user_ptr(p) ((void __user *)(p)) static __always_inline int -save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) +__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { WARN_ON(!FULL_REGS(regs)); unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed); @@ -177,7 +177,7 @@ static inline int restore_general_regs(struct pt_regs *regs, #endif #define unsafe_save_general_regs(regs, frame, label) do { \ - if (save_general_regs_unsafe(regs, frame)) \ + if (__unsafe_save_general_regs(regs, frame)) \ goto label; \ } while (0) @@ -260,8 +260,8 @@ static void prepare_save_user_regs(int ctx_has_vsx_region) #endif } -static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int ctx_has_vsx_region) +static int __unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, int ctx_has_vsx_region) { unsigned long msr = regs->msr; @@ -338,7 +338,7 @@ static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *f } #define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \ - if (save_user_regs_unsafe(regs, frame, tm_frame, has_vsx)) \ + if (__unsafe_save_user_regs(regs, frame, tm_frame, has_vsx)) \ goto label; \ } while (0) @@ -350,7 +350,7 @@ static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *f * We also save the transactional registers to a second ucontext in the * frame. * - * See save_user_regs_unsafe() and signal_64.c:setup_tm_sigcontexts(). + * See __unsafe_save_user_regs() and signal_64.c:setup_tm_sigcontexts(). */ static void prepare_save_tm_user_regs(void) { @@ -441,7 +441,7 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in save_user_regs_unsafe(). + * simply the same as in __unsafe_save_user_regs(). */ if (current->thread.used_spe) { unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, From ca9e1605cdd9473a0eb4d6da238d2524be12591a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 11:06:54 +0000 Subject: [PATCH 127/302] powerpc/signal32: Remove ifdefery in middle of if/else in sigreturn() In the same spirit as commit f1cf4f93de2f ("powerpc/signal32: Remove ifdefery in middle of if/else") MSR_TM_ACTIVE() is always defined and returns always 0 when CONFIG_PPC_TRANSACTIONAL_MEM is not selected, so the awful ifdefery in the middle of an if/else can be removed. Make 'msr_hi' a 'long long' to avoid build failure on PPC32 due to the 32 bits left shift. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a4b48b2f0be1ef13fc8e57452b7f8350da28d521.1616151715.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 3b78748d6d85b3..8dfe4fe777069d 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -740,6 +740,12 @@ static long restore_tm_user_regs(struct pt_regs *regs, return 0; } +#else +static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr, + struct mcontext __user *tm_sr) +{ + return 0; +} #endif #ifdef CONFIG_PPC64 @@ -1317,10 +1323,9 @@ SYSCALL_DEFINE0(sigreturn) struct mcontext __user *sr; void __user *addr; sigset_t set; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - struct mcontext __user *mcp, *tm_mcp; - unsigned long msr_hi; -#endif + struct mcontext __user *mcp; + struct mcontext __user *tm_mcp = NULL; + unsigned long long msr_hi = 0; /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -1343,19 +1348,18 @@ SYSCALL_DEFINE0(sigreturn) #endif set_current_blocked(&set); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM mcp = (struct mcontext __user *)&sf->mctx; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mcp = (struct mcontext __user *)&sf->mctx_transact; if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR])) goto badframe; +#endif if (MSR_TM_ACTIVE(msr_hi<<32)) { if (!cpu_has_feature(CPU_FTR_TM)) goto badframe; if (restore_tm_user_regs(regs, mcp, tm_mcp)) goto badframe; - } else -#endif - { + } else { sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); addr = sr; if (!access_ok(sr, sizeof(*sr)) From 362471b3192e4184fff5fedee1ea20bdf637a0c8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 11:06:55 +0000 Subject: [PATCH 128/302] powerpc/signal32: Perform access_ok() inside restore_user_regs() In preparation of using user_access_begin/end in restore_user_regs(), move the access_ok() inside the function. It makes no difference as the behaviour on a failed access_ok() is the same as on failed restore_user_regs(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c106eb2f37c3040f1fd38b40e50c670feb7cb835.1616151715.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 8dfe4fe777069d..e2b1d2a0abadf2 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -492,6 +492,8 @@ static long restore_user_regs(struct pt_regs *regs, int i; #endif + if (!access_ok(sr, sizeof(*sr))) + return 1; /* * restore general registers but not including MSR or SOFTE. Also * take care of keeping r2 (TLS) intact if not a signal @@ -963,13 +965,10 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int if (__get_user(cmcp, &ucp->uc_regs)) return -EFAULT; mcp = (struct mcontext __user *)(u64)cmcp; - /* no need to check access_ok(mcp), since mcp < 4GB */ } #else if (__get_user(mcp, &ucp->uc_regs)) return -EFAULT; - if (!access_ok(mcp, sizeof(*mcp))) - return -EFAULT; #endif set_current_blocked(&set); if (restore_user_regs(regs, mcp, sig)) @@ -1362,8 +1361,7 @@ SYSCALL_DEFINE0(sigreturn) } else { sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); addr = sr; - if (!access_ok(sr, sizeof(*sr)) - || restore_user_regs(regs, sr, 1)) + if (restore_user_regs(regs, sr, 1)) goto badframe; } From 036fc2cb1dc2245c2ea7d2f03c7af80417b6310c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 11:06:56 +0000 Subject: [PATCH 129/302] powerpc/signal32: Reorder user reads in restore_tm_user_regs() In restore_tm_user_regs(), regroup the reads from 'sr' and the ones from 'tm_sr' together in order to allow two block user accesses in following patch. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7c518b9a4c8e5ae9a3bfb647bc8b20bf820233af.1616151715.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 49 +++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index e2b1d2a0abadf2..088c838530266f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -607,8 +607,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR * were set by the signal delivery. */ - err = restore_general_regs(regs, tm_sr); - err |= restore_general_regs(¤t->thread.ckpt_regs, sr); + err = restore_general_regs(¤t->thread.ckpt_regs, sr); err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]); @@ -624,9 +623,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, if (msr & MSR_VEC) { /* restore altivec registers from the stack */ if (__copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, - sizeof(sr->mc_vregs)) || - __copy_from_user(¤t->thread.vr_state, - &tm_sr->mc_vregs, sizeof(sr->mc_vregs))) return 1; current->thread.used_vr = true; @@ -639,9 +635,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, /* Always get VRSAVE back */ if (__get_user(current->thread.ckvrsave, - (u32 __user *)&sr->mc_vregs[32]) || - __get_user(current->thread.vrsave, - (u32 __user *)&tm_sr->mc_vregs[32])) + (u32 __user *)&sr->mc_vregs[32])) return 1; if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.ckvrsave); @@ -649,8 +643,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); - if (copy_fpr_from_user(current, &sr->mc_fregs) || - copy_ckfpr_from_user(current, &tm_sr->mc_fregs)) + if (copy_fpr_from_user(current, &sr->mc_fregs)) return 1; #ifdef CONFIG_VSX @@ -660,8 +653,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, * Restore altivec registers from the stack to a local * buffer, then write this out to the thread_struct */ - if (copy_vsx_from_user(current, &tm_sr->mc_vsregs) || - copy_ckvsx_from_user(current, &sr->mc_vsregs)) + if (copy_ckvsx_from_user(current, &sr->mc_vsregs)) return 1; current->thread.used_vsr = true; } else if (current->thread.used_vsr) @@ -690,6 +682,39 @@ static long restore_tm_user_regs(struct pt_regs *regs, return 1; #endif /* CONFIG_SPE */ + err = restore_general_regs(regs, tm_sr); + if (err) + return 1; + +#ifdef CONFIG_ALTIVEC + /* restore altivec registers from the stack */ + if (msr & MSR_VEC) + if (__copy_from_user(¤t->thread.vr_state, + &tm_sr->mc_vregs, + sizeof(sr->mc_vregs))) + return 1; + + /* Always get VRSAVE back */ + if (__get_user(current->thread.vrsave, + (u32 __user *)&tm_sr->mc_vregs[32])) + return 1; +#endif /* CONFIG_ALTIVEC */ + + if (copy_ckfpr_from_user(current, &tm_sr->mc_fregs)) + return 1; + +#ifdef CONFIG_VSX + if (msr & MSR_VSX) { + /* + * Restore altivec registers from the stack to a local + * buffer, then write this out to the thread_struct + */ + if (copy_vsx_from_user(current, &tm_sr->mc_vsregs)) + return 1; + current->thread.used_vsr = true; + } +#endif /* CONFIG_VSX */ + /* Get the top half of the MSR from the user context */ if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) return 1; From 627b72bee84d6652e0af26617e71ce2b3c18fcd5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 11:06:57 +0000 Subject: [PATCH 130/302] powerpc/signal32: Convert restore_[tm]_user_regs() to user access block Convert restore_user_regs() and restore_tm_user_regs() to use user_access_read_begin/end blocks. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/181adf15a6f644efcd1aeafb355f3578ff1b6bc5.1616151715.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ptrace.h | 2 +- arch/powerpc/kernel/signal_32.c | 141 +++++++++++++++--------------- 2 files changed, 72 insertions(+), 71 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index f10498e1b3f617..95600f3a6523a5 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -245,7 +245,7 @@ static inline bool trap_norestart(struct pt_regs *regs) return regs->trap & 0x10; } -static inline void set_trap_norestart(struct pt_regs *regs) +static __always_inline void set_trap_norestart(struct pt_regs *regs) { regs->trap |= 0x10; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 088c838530266f..0b1a6f53e55394 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -116,8 +116,8 @@ __unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) return 1; } -static inline int restore_general_regs(struct pt_regs *regs, - struct mcontext __user *sr) +static __always_inline int +__unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int i; @@ -125,10 +125,12 @@ static inline int restore_general_regs(struct pt_regs *regs, for (i = 0; i <= PT_RESULT; i++) { if ((i == PT_MSR) || (i == PT_SOFTE)) continue; - if (__get_user(gregs[i], &sr->mc_gregs[i])) - return -EFAULT; + unsafe_get_user(gregs[i], &sr->mc_gregs[i], failed); } return 0; + +failed: + return 1; } #else /* CONFIG_PPC64 */ @@ -161,18 +163,20 @@ __unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) return 1; } -static inline int restore_general_regs(struct pt_regs *regs, - struct mcontext __user *sr) +static __always_inline +int __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr) { /* copy up to but not including MSR */ - if (__copy_from_user(regs, &sr->mc_gregs, - PT_MSR * sizeof(elf_greg_t))) - return -EFAULT; + unsafe_copy_from_user(regs, &sr->mc_gregs, PT_MSR * sizeof(elf_greg_t), failed); + /* copy from orig_r3 (the word after the MSR) up to the end */ - if (__copy_from_user(®s->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3], - GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t))) - return -EFAULT; + unsafe_copy_from_user(®s->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3], + GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t), failed); + return 0; + +failed: + return 1; } #endif @@ -181,6 +185,11 @@ static inline int restore_general_regs(struct pt_regs *regs, goto label; \ } while (0) +#define unsafe_restore_general_regs(regs, frame, label) do { \ + if (__unsafe_restore_general_regs(regs, frame)) \ + goto label; \ +} while (0) + /* * When we have signals to deliver, we set up on the * user stack, going down from the original stack pointer: @@ -485,14 +494,13 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user static long restore_user_regs(struct pt_regs *regs, struct mcontext __user *sr, int sig) { - long err; unsigned int save_r2 = 0; unsigned long msr; #ifdef CONFIG_VSX int i; #endif - if (!access_ok(sr, sizeof(*sr))) + if (!user_read_access_begin(sr, sizeof(*sr))) return 1; /* * restore general registers but not including MSR or SOFTE. Also @@ -500,13 +508,11 @@ static long restore_user_regs(struct pt_regs *regs, */ if (!sig) save_r2 = (unsigned int)regs->gpr[2]; - err = restore_general_regs(regs, sr); + unsafe_restore_general_regs(regs, sr, failed); set_trap_norestart(regs); - err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); + unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed); if (!sig) regs->gpr[2] = (unsigned long) save_r2; - if (err) - return 1; /* if doing signal return, restore the previous little-endian mode */ if (sig) @@ -520,22 +526,19 @@ static long restore_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ - if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, - sizeof(sr->mc_vregs))) - return 1; + unsafe_copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, + sizeof(sr->mc_vregs), failed); current->thread.used_vr = true; } else if (current->thread.used_vr) memset(¤t->thread.vr_state, 0, ELF_NVRREG * sizeof(vector128)); /* Always get VRSAVE back */ - if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32])) - return 1; + unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed); if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.vrsave); #endif /* CONFIG_ALTIVEC */ - if (copy_fpr_from_user(current, &sr->mc_fregs)) - return 1; + unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed); #ifdef CONFIG_VSX /* @@ -548,8 +551,7 @@ static long restore_user_regs(struct pt_regs *regs, * Restore altivec registers from the stack to a local * buffer, then write this out to the thread_struct */ - if (copy_vsx_from_user(current, &sr->mc_vsregs)) - return 1; + unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed); current->thread.used_vsr = true; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) @@ -567,19 +569,22 @@ static long restore_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_SPE; if (msr & MSR_SPE) { /* restore spe registers from the stack */ - if (__copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, + ELF_NEVRREG * sizeof(u32)); current->thread.used_spe = true; } else if (current->thread.used_spe) memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); /* Always get SPEFSCR back */ - if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG)) - return 1; + unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ + user_read_access_end(); return 0; + +failed: + user_read_access_end(); + return 1; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -592,7 +597,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr, struct mcontext __user *tm_sr) { - long err; unsigned long msr, msr_hi; #ifdef CONFIG_VSX int i; @@ -607,14 +611,13 @@ static long restore_tm_user_regs(struct pt_regs *regs, * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR * were set by the signal delivery. */ - err = restore_general_regs(¤t->thread.ckpt_regs, sr); - - err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]); - - err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); - if (err) + if (!user_read_access_begin(sr, sizeof(*sr))) return 1; + unsafe_restore_general_regs(¤t->thread.ckpt_regs, sr, failed); + unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed); + unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed); + /* Restore the previous little-endian mode */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); @@ -622,9 +625,8 @@ static long restore_tm_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ - if (__copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, - sizeof(sr->mc_vregs))) - return 1; + unsafe_copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, + sizeof(sr->mc_vregs), failed); current->thread.used_vr = true; } else if (current->thread.used_vr) { memset(¤t->thread.vr_state, 0, @@ -634,17 +636,15 @@ static long restore_tm_user_regs(struct pt_regs *regs, } /* Always get VRSAVE back */ - if (__get_user(current->thread.ckvrsave, - (u32 __user *)&sr->mc_vregs[32])) - return 1; + unsafe_get_user(current->thread.ckvrsave, + (u32 __user *)&sr->mc_vregs[32], failed); if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.ckvrsave); #endif /* CONFIG_ALTIVEC */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); - if (copy_fpr_from_user(current, &sr->mc_fregs)) - return 1; + unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed); #ifdef CONFIG_VSX regs->msr &= ~MSR_VSX; @@ -653,8 +653,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, * Restore altivec registers from the stack to a local * buffer, then write this out to the thread_struct */ - if (copy_ckvsx_from_user(current, &sr->mc_vsregs)) - return 1; + unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed); current->thread.used_vsr = true; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) { @@ -669,39 +668,36 @@ static long restore_tm_user_regs(struct pt_regs *regs, */ regs->msr &= ~MSR_SPE; if (msr & MSR_SPE) { - if (__copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, + ELF_NEVRREG * sizeof(u32), failed); current->thread.used_spe = true; } else if (current->thread.used_spe) memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); /* Always get SPEFSCR back */ - if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs - + ELF_NEVRREG)) - return 1; + unsafe_get_user(current->thread.spefscr, + (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ - err = restore_general_regs(regs, tm_sr); - if (err) + user_read_access_end(); + + if (!user_read_access_begin(tm_sr, sizeof(*tm_sr))) return 1; + unsafe_restore_general_regs(regs, tm_sr, failed); + #ifdef CONFIG_ALTIVEC /* restore altivec registers from the stack */ if (msr & MSR_VEC) - if (__copy_from_user(¤t->thread.vr_state, - &tm_sr->mc_vregs, - sizeof(sr->mc_vregs))) - return 1; + unsafe_copy_from_user(¤t->thread.vr_state, &tm_sr->mc_vregs, + sizeof(sr->mc_vregs), failed); /* Always get VRSAVE back */ - if (__get_user(current->thread.vrsave, - (u32 __user *)&tm_sr->mc_vregs[32])) - return 1; + unsafe_get_user(current->thread.vrsave, + (u32 __user *)&tm_sr->mc_vregs[32], failed); #endif /* CONFIG_ALTIVEC */ - if (copy_ckfpr_from_user(current, &tm_sr->mc_fregs)) - return 1; + unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed); #ifdef CONFIG_VSX if (msr & MSR_VSX) { @@ -709,16 +705,17 @@ static long restore_tm_user_regs(struct pt_regs *regs, * Restore altivec registers from the stack to a local * buffer, then write this out to the thread_struct */ - if (copy_vsx_from_user(current, &tm_sr->mc_vsregs)) - return 1; + unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed); current->thread.used_vsr = true; } #endif /* CONFIG_VSX */ /* Get the top half of the MSR from the user context */ - if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) - return 1; + unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed); msr_hi <<= 32; + + user_read_access_end(); + /* If TM bits are set to the reserved value, it's an invalid context */ if (MSR_TM_RESV(msr_hi)) return 1; @@ -766,6 +763,10 @@ static long restore_tm_user_regs(struct pt_regs *regs, preempt_enable(); return 0; + +failed: + user_read_access_end(); + return 1; } #else static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr, From 887f3ceb51cd34109ac17bfc98695162e299e657 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 11:06:58 +0000 Subject: [PATCH 131/302] powerpc/signal32: Convert do_setcontext[_tm]() to user access block Add unsafe_get_user_sigset() and transform PPC32 get_sigset_t() into an unsafe version unsafe_get_sigset_t(). Then convert do_setcontext() and do_setcontext_tm() to use user_read_access_begin/end. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9273ba664db769b8d9c7540ae91395e346e4945e.1616151715.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal.h | 2 ++ arch/powerpc/kernel/signal_32.c | 42 +++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index a5152ff3c52f3b..f4aafa337c2edb 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -25,6 +25,8 @@ static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src) return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]); } +#define unsafe_get_user_sigset(dst, src, label) \ + unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label) #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 0b1a6f53e55394..592b889e38368c 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -83,12 +83,7 @@ * implementation that makes things simple for little endian only) */ #define unsafe_put_sigset_t unsafe_put_compat_sigset - -static inline int get_sigset_t(sigset_t *set, - const compat_sigset_t __user *uset) -{ - return get_compat_sigset(set, uset); -} +#define unsafe_get_sigset_t unsafe_get_compat_sigset #define to_user_ptr(p) ptr_to_compat(p) #define from_user_ptr(p) compat_ptr(p) @@ -144,10 +139,7 @@ __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr) unsafe_copy_to_user(__us, __s, sizeof(*__us), label); \ } while (0) -static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) -{ - return __get_user_sigset(set, uset); -} +#define unsafe_get_sigset_t unsafe_get_user_sigset #define to_user_ptr(p) ((unsigned long)(p)) #define from_user_ptr(p) ((void __user *)(p)) @@ -982,25 +974,31 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sigset_t set; struct mcontext __user *mcp; - if (get_sigset_t(&set, &ucp->uc_sigmask)) + if (user_read_access_begin(ucp, sizeof(*ucp))) return -EFAULT; + + unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed); #ifdef CONFIG_PPC64 { u32 cmcp; - if (__get_user(cmcp, &ucp->uc_regs)) - return -EFAULT; + unsafe_get_user(cmcp, &ucp->uc_regs, failed); mcp = (struct mcontext __user *)(u64)cmcp; } #else - if (__get_user(mcp, &ucp->uc_regs)) - return -EFAULT; + unsafe_get_user(mcp, &ucp->uc_regs, failed); #endif + user_read_access_end(); + set_current_blocked(&set); if (restore_user_regs(regs, mcp, sig)) return -EFAULT; return 0; + +failed: + user_read_access_end(); + return -EFAULT; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1014,11 +1012,15 @@ static int do_setcontext_tm(struct ucontext __user *ucp, u32 cmcp; u32 tm_cmcp; - if (get_sigset_t(&set, &ucp->uc_sigmask)) + if (user_read_access_begin(ucp, sizeof(*ucp))) return -EFAULT; - if (__get_user(cmcp, &ucp->uc_regs) || - __get_user(tm_cmcp, &tm_ucp->uc_regs)) + unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed); + unsafe_get_user(cmcp, &ucp->uc_regs, failed); + + user_read_access_end(); + + if (__get_user(tm_cmcp, &tm_ucp->uc_regs)) return -EFAULT; mcp = (struct mcontext __user *)(u64)cmcp; tm_mcp = (struct mcontext __user *)(u64)tm_cmcp; @@ -1029,6 +1031,10 @@ static int do_setcontext_tm(struct ucontext __user *ucp, return -EFAULT; return 0; + +failed: + user_read_access_end(); + return -EFAULT; } #endif From c7393a71eb1abdda7e3a3ef798bae60de11540ec Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 11:07:00 +0000 Subject: [PATCH 132/302] powerpc/signal32: Simplify logging in sigreturn() Same spirit as commit debf122c777f ("powerpc/signal32: Simplify logging in handle_rt_signal32()"), remove this intermediate 'addr' local var. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/638fa99530beb29f82f94370057d110e91272acc.1616151715.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 592b889e38368c..5be267b3a13e9f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1352,7 +1352,6 @@ SYSCALL_DEFINE0(sigreturn) struct sigcontext __user *sc; struct sigcontext sigctx; struct mcontext __user *sr; - void __user *addr; sigset_t set; struct mcontext __user *mcp; struct mcontext __user *tm_mcp = NULL; @@ -1363,7 +1362,6 @@ SYSCALL_DEFINE0(sigreturn) sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); sc = &sf->sctx; - addr = sc; if (copy_from_user(&sigctx, sc, sizeof(sigctx))) goto badframe; @@ -1392,16 +1390,19 @@ SYSCALL_DEFINE0(sigreturn) goto badframe; } else { sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); - addr = sr; - if (restore_user_regs(regs, sr, 1)) - goto badframe; + if (restore_user_regs(regs, sr, 1)) { + signal_fault(current, regs, "sys_sigreturn", sr); + + force_sig(SIGSEGV); + return 0; + } } set_thread_flag(TIF_RESTOREALL); return 0; badframe: - signal_fault(current, regs, "sys_sigreturn", addr); + signal_fault(current, regs, "sys_sigreturn", sc); force_sig(SIGSEGV); return 0; From 6944caad78fc4de4ecd0364bbc9715b62b020965 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:46 +0000 Subject: [PATCH 133/302] powerpc/bpf: Remove classical BPF support for PPC32 At the time being, PPC32 has Classical BPF support. The test_bpf module exhibits some failure: test_bpf: #298 LD_IND byte frag jited:1 ret 202 != 66 FAIL (1 times) test_bpf: #299 LD_IND halfword frag jited:1 ret 51958 != 17220 FAIL (1 times) test_bpf: #301 LD_IND halfword mixed head/frag jited:1 ret 51958 != 1305 FAIL (1 times) test_bpf: #303 LD_ABS byte frag jited:1 ret 202 != 66 FAIL (1 times) test_bpf: #304 LD_ABS halfword frag jited:1 ret 51958 != 17220 FAIL (1 times) test_bpf: #306 LD_ABS halfword mixed head/frag jited:1 ret 51958 != 1305 FAIL (1 times) test_bpf: Summary: 371 PASSED, 7 FAILED, [119/366 JIT'ed] Fixing this is not worth the effort. Instead, remove support for classical BPF and prepare for adding Extended BPF support instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fbc3e4fcc9c8f6131d6c705212530b2aa50149ee.1616430991.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 1 - arch/powerpc/net/Makefile | 4 - arch/powerpc/net/bpf_jit32.h | 139 ------- arch/powerpc/net/bpf_jit_asm.S | 226 ----------- arch/powerpc/net/bpf_jit_comp.c | 683 -------------------------------- 5 files changed, 1053 deletions(-) delete mode 100644 arch/powerpc/net/bpf_jit32.h delete mode 100644 arch/powerpc/net/bpf_jit_asm.S delete mode 100644 arch/powerpc/net/bpf_jit_comp.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6c400f877d8942..29217437b8acf6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -195,7 +195,6 @@ config PPC select HAVE_ARCH_TRACEHOOK select HAVE_ASM_MODVERSIONS select HAVE_C_RECORDMCOUNT - select HAVE_CBPF_JIT if !PPC64 select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_CONTEXT_TRACKING if PPC64 diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index c2dec3a68d4c42..52c939cef5b2ab 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -2,8 +2,4 @@ # # Arch-specific network modules # -ifdef CONFIG_PPC64 obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o -else -obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o -endif diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h deleted file mode 100644 index 448dfd4d98e184..00000000000000 --- a/arch/powerpc/net/bpf_jit32.h +++ /dev/null @@ -1,139 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * bpf_jit32.h: BPF JIT compiler for PPC - * - * Copyright 2011 Matt Evans , IBM Corporation - * - * Split from bpf_jit.h - */ -#ifndef _BPF_JIT32_H -#define _BPF_JIT32_H - -#include -#include "bpf_jit.h" - -#ifdef CONFIG_PPC64 -#define BPF_PPC_STACK_R3_OFF 48 -#define BPF_PPC_STACK_LOCALS 32 -#define BPF_PPC_STACK_BASIC (48+64) -#define BPF_PPC_STACK_SAVE (18*8) -#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ - BPF_PPC_STACK_SAVE) -#define BPF_PPC_SLOWPATH_FRAME (48+64) -#else -#define BPF_PPC_STACK_R3_OFF 24 -#define BPF_PPC_STACK_LOCALS 16 -#define BPF_PPC_STACK_BASIC (24+32) -#define BPF_PPC_STACK_SAVE (18*4) -#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ - BPF_PPC_STACK_SAVE) -#define BPF_PPC_SLOWPATH_FRAME (24+32) -#endif - -#define REG_SZ (BITS_PER_LONG/8) - -/* - * Generated code register usage: - * - * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with: - * - * skb r3 (Entry parameter) - * A register r4 - * X register r5 - * addr param r6 - * r7-r10 scratch - * skb->data r14 - * skb headlen r15 (skb->len - skb->data_len) - * m[0] r16 - * m[...] ... - * m[15] r31 - */ -#define r_skb 3 -#define r_ret 3 -#define r_A 4 -#define r_X 5 -#define r_addr 6 -#define r_scratch1 7 -#define r_scratch2 8 -#define r_D 14 -#define r_HL 15 -#define r_M 16 - -#ifndef __ASSEMBLY__ - -/* - * Assembly helpers from arch/powerpc/net/bpf_jit.S: - */ -#define DECLARE_LOAD_FUNC(func) \ - extern u8 func[], func##_negative_offset[], func##_positive_offset[] - -DECLARE_LOAD_FUNC(sk_load_word); -DECLARE_LOAD_FUNC(sk_load_half); -DECLARE_LOAD_FUNC(sk_load_byte); -DECLARE_LOAD_FUNC(sk_load_byte_msh); - -#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0) - -#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0) - -#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LWZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LWZ(r, r, IMM_L(i))); } } while(0) - -#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LHZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LHZ(r, r, IMM_L(i))); } } while(0) - -#ifdef CONFIG_PPC64 -#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0) -#else -#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0) -#endif - -#ifdef CONFIG_SMP -#ifdef CONFIG_PPC64 -#define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2); \ - PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \ - } while (0) -#else -#define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4); \ - PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \ - } while(0) -#endif -#else -#define PPC_BPF_LOAD_CPU(r) do { EMIT(PPC_RAW_LI(r, 0)); } while(0) -#endif - -#define PPC_LHBRX_OFFS(r, base, i) \ - do { PPC_LI32(r, i); EMIT(PPC_RAW_LHBRX(r, r, base)); } while(0) -#ifdef __LITTLE_ENDIAN__ -#define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i) -#else -#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) -#endif - -#define PPC_BPF_LL(r, base, i) do { EMIT(PPC_RAW_LWZ(r, base, i)); } while(0) -#define PPC_BPF_STL(r, base, i) do { EMIT(PPC_RAW_STW(r, base, i)); } while(0) -#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STWU(r, base, i)); } while(0) - -#define SEEN_DATAREF 0x10000 /* might call external helpers */ -#define SEEN_XREG 0x20000 /* X reg is used */ -#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<, IBM Corporation - */ - -#include -#include -#include "bpf_jit32.h" - -/* - * All of these routines are called directly from generated code, - * whose register usage is: - * - * r3 skb - * r4,r5 A,X - * r6 *** address parameter to helper *** - * r7-r10 scratch - * r14 skb->data - * r15 skb headlen - * r16-31 M[] - */ - -/* - * To consider: These helpers are so small it could be better to just - * generate them inline. Inline code can do the simple headlen check - * then branch directly to slow_path_XXX if required. (In fact, could - * load a spare GPR with the address of slow_path_generic and pass size - * as an argument, making the call site a mtlr, li and bllr.) - */ - .globl sk_load_word -sk_load_word: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_word_neg - .globl sk_load_word_positive_offset -sk_load_word_positive_offset: - /* Are we accessing past headlen? */ - subi r_scratch1, r_HL, 4 - PPC_LCMP r_scratch1, r_addr - blt bpf_slow_path_word - /* Nope, just hitting the header. cr0 here is eq or gt! */ -#ifdef __LITTLE_ENDIAN__ - lwbrx r_A, r_D, r_addr -#else - lwzx r_A, r_D, r_addr -#endif - blr /* Return success, cr0 != LT */ - - .globl sk_load_half -sk_load_half: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_half_neg - .globl sk_load_half_positive_offset -sk_load_half_positive_offset: - subi r_scratch1, r_HL, 2 - PPC_LCMP r_scratch1, r_addr - blt bpf_slow_path_half -#ifdef __LITTLE_ENDIAN__ - lhbrx r_A, r_D, r_addr -#else - lhzx r_A, r_D, r_addr -#endif - blr - - .globl sk_load_byte -sk_load_byte: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_byte_neg - .globl sk_load_byte_positive_offset -sk_load_byte_positive_offset: - PPC_LCMP r_HL, r_addr - ble bpf_slow_path_byte - lbzx r_A, r_D, r_addr - blr - -/* - * BPF_LDX | BPF_B | BPF_MSH: ldxb 4*([offset]&0xf) - * r_addr is the offset value - */ - .globl sk_load_byte_msh -sk_load_byte_msh: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_byte_msh_neg - .globl sk_load_byte_msh_positive_offset -sk_load_byte_msh_positive_offset: - PPC_LCMP r_HL, r_addr - ble bpf_slow_path_byte_msh - lbzx r_X, r_D, r_addr - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -/* Call out to skb_copy_bits: - * We'll need to back up our volatile regs first; we have - * local variable space at r1+(BPF_PPC_STACK_BASIC). - * Allocate a new stack frame here to remain ABI-compliant in - * stashing LR. - */ -#define bpf_slow_path_common(SIZE) \ - mflr r0; \ - PPC_STL r0, PPC_LR_STKOFF(r1); \ - /* R3 goes in parameter space of caller's frame */ \ - PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - addi r5, r1, BPF_PPC_STACK_BASIC+(2*REG_SZ); \ - PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \ - /* R3 = r_skb, as passed */ \ - mr r4, r_addr; \ - li r6, SIZE; \ - bl skb_copy_bits; \ - nop; \ - /* R3 = 0 on success */ \ - addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ - PPC_LL r0, PPC_LR_STKOFF(r1); \ - PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - mtlr r0; \ - PPC_LCMPI r3, 0; \ - blt bpf_error; /* cr0 = LT */ \ - PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - /* Great success! */ - -bpf_slow_path_word: - bpf_slow_path_common(4) - /* Data value is on stack, and cr0 != LT */ - lwz r_A, BPF_PPC_STACK_BASIC+(2*REG_SZ)(r1) - blr - -bpf_slow_path_half: - bpf_slow_path_common(2) - lhz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1) - blr - -bpf_slow_path_byte: - bpf_slow_path_common(1) - lbz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1) - blr - -bpf_slow_path_byte_msh: - bpf_slow_path_common(1) - lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1) - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -/* Call out to bpf_internal_load_pointer_neg_helper: - * We'll need to back up our volatile regs first; we have - * local variable space at r1+(BPF_PPC_STACK_BASIC). - * Allocate a new stack frame here to remain ABI-compliant in - * stashing LR. - */ -#define sk_negative_common(SIZE) \ - mflr r0; \ - PPC_STL r0, PPC_LR_STKOFF(r1); \ - /* R3 goes in parameter space of caller's frame */ \ - PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \ - /* R3 = r_skb, as passed */ \ - mr r4, r_addr; \ - li r5, SIZE; \ - bl bpf_internal_load_pointer_neg_helper; \ - nop; \ - /* R3 != 0 on success */ \ - addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ - PPC_LL r0, PPC_LR_STKOFF(r1); \ - PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - mtlr r0; \ - PPC_LCMPLI r3, 0; \ - beq bpf_error_slow; /* cr0 = EQ */ \ - mr r_addr, r3; \ - PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - /* Great success! */ - -bpf_slow_path_word_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_word_negative_offset -sk_load_word_negative_offset: - sk_negative_common(4) - lwz r_A, 0(r_addr) - blr - -bpf_slow_path_half_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_half_negative_offset -sk_load_half_negative_offset: - sk_negative_common(2) - lhz r_A, 0(r_addr) - blr - -bpf_slow_path_byte_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_byte_negative_offset -sk_load_byte_negative_offset: - sk_negative_common(1) - lbz r_A, 0(r_addr) - blr - -bpf_slow_path_byte_msh_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_byte_msh_negative_offset -sk_load_byte_msh_negative_offset: - sk_negative_common(1) - lbz r_X, 0(r_addr) - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -bpf_error_slow: - /* fabricate a cr0 = lt */ - li r_scratch1, -1 - PPC_LCMPI r_scratch1, 0 -bpf_error: - /* Entered with cr0 = lt */ - li r3, 0 - /* Generated code will 'blt epilogue', returning 0. */ - blr diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c deleted file mode 100644 index e809cb5a16316b..00000000000000 --- a/arch/powerpc/net/bpf_jit_comp.c +++ /dev/null @@ -1,683 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* bpf_jit_comp.c: BPF JIT compiler - * - * Copyright 2011 Matt Evans , IBM Corporation - * - * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com) - * Ported to ppc32 by Denis Kirjanov - */ -#include -#include -#include -#include -#include -#include - -#include "bpf_jit32.h" - -static inline void bpf_flush_icache(void *start, void *end) -{ - smp_wmb(); - flush_icache_range((unsigned long)start, (unsigned long)end); -} - -static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx) -{ - int i; - const struct sock_filter *filter = fp->insns; - - if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { - /* Make stackframe */ - if (ctx->seen & SEEN_DATAREF) { - /* If we call any helpers (for loads), save LR */ - EMIT(PPC_INST_MFLR | __PPC_RT(R0)); - PPC_BPF_STL(0, 1, PPC_LR_STKOFF); - - /* Back up non-volatile regs. */ - PPC_BPF_STL(r_D, 1, -(REG_SZ*(32-r_D))); - PPC_BPF_STL(r_HL, 1, -(REG_SZ*(32-r_HL))); - } - if (ctx->seen & SEEN_MEM) { - /* - * Conditionally save regs r15-r31 as some will be used - * for M[] data. - */ - for (i = r_M; i < (r_M+16); i++) { - if (ctx->seen & (1 << (i-r_M))) - PPC_BPF_STL(i, 1, -(REG_SZ*(32-i))); - } - } - PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME); - } - - if (ctx->seen & SEEN_DATAREF) { - /* - * If this filter needs to access skb data, - * prepare r_D and r_HL: - * r_HL = skb->len - skb->data_len - * r_D = skb->data - */ - PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, - data_len)); - PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len)); - EMIT(PPC_RAW_SUB(r_HL, r_HL, r_scratch1)); - PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data)); - } - - if (ctx->seen & SEEN_XREG) { - /* - * TODO: Could also detect whether first instr. sets X and - * avoid this (as below, with A). - */ - EMIT(PPC_RAW_LI(r_X, 0)); - } - - /* make sure we dont leak kernel information to user */ - if (bpf_needs_clear_a(&filter[0])) - EMIT(PPC_RAW_LI(r_A, 0)); -} - -static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) -{ - int i; - - if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { - EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME)); - if (ctx->seen & SEEN_DATAREF) { - PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - EMIT(PPC_RAW_MTLR(0)); - PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D))); - PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL))); - } - if (ctx->seen & SEEN_MEM) { - /* Restore any saved non-vol registers */ - for (i = r_M; i < (r_M+16); i++) { - if (ctx->seen & (1 << (i-r_M))) - PPC_BPF_LL(i, 1, -(REG_SZ*(32-i))); - } - } - } - /* The RETs have left a return value in R3. */ - - EMIT(PPC_RAW_BLR()); -} - -#define CHOOSE_LOAD_FUNC(K, func) \ - ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) - -/* Assemble the body code between the prologue & epilogue. */ -static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, - unsigned int *addrs) -{ - const struct sock_filter *filter = fp->insns; - int flen = fp->len; - u8 *func; - unsigned int true_cond; - int i; - - /* Start of epilogue code */ - unsigned int exit_addr = addrs[flen]; - - for (i = 0; i < flen; i++) { - unsigned int K = filter[i].k; - u16 code = bpf_anc_helper(&filter[i]); - - /* - * addrs[] maps a BPF bytecode address into a real offset from - * the start of the body code. - */ - addrs[i] = ctx->idx * 4; - - switch (code) { - /*** ALU ops ***/ - case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_ADD(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ - if (!K) - break; - EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(K))); - break; - case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SUB(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ - if (!K) - break; - EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(-K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(-K))); - break; - case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_MULW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ - if (K < 32768) - EMIT(PPC_RAW_MULI(r_A, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_MULW(r_A, r_A, r_scratch1)); - } - break; - case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ - case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_CMPWI(r_X, 0)); - if (ctx->pc_ret0 != -1) { - PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); - } else { - PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); - EMIT(PPC_RAW_LI(r_ret, 0)); - PPC_JMP(exit_addr); - } - if (code == (BPF_ALU | BPF_MOD | BPF_X)) { - EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_X)); - EMIT(PPC_RAW_MULW(r_scratch1, r_X, r_scratch1)); - EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); - } else { - EMIT(PPC_RAW_DIVWU(r_A, r_A, r_X)); - } - break; - case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ - PPC_LI32(r_scratch2, K); - EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_scratch2)); - EMIT(PPC_RAW_MULW(r_scratch1, r_scratch2, r_scratch1)); - EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); - break; - case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ - if (K == 1) - break; - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_DIVWU(r_A, r_A, r_scratch1)); - break; - case BPF_ALU | BPF_AND | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_AND(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_AND | BPF_K: - if (!IMM_H(K)) - EMIT(PPC_RAW_ANDI(r_A, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_AND(r_A, r_A, r_scratch1)); - } - break; - case BPF_ALU | BPF_OR | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_OR(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_OR | BPF_K: - if (IMM_L(K)) - EMIT(PPC_RAW_ORI(r_A, r_A, IMM_L(K))); - if (K >= 65536) - EMIT(PPC_RAW_ORIS(r_A, r_A, IMM_H(K))); - break; - case BPF_ANC | SKF_AD_ALU_XOR_X: - case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_XOR(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ - if (IMM_L(K)) - EMIT(PPC_RAW_XORI(r_A, r_A, IMM_L(K))); - if (K >= 65536) - EMIT(PPC_RAW_XORIS(r_A, r_A, IMM_H(K))); - break; - case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SLW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_LSH | BPF_K: - if (K == 0) - break; - else - EMIT(PPC_RAW_SLWI(r_A, r_A, K)); - break; - case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SRW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ - if (K == 0) - break; - else - EMIT(PPC_RAW_SRWI(r_A, r_A, K)); - break; - case BPF_ALU | BPF_NEG: - EMIT(PPC_RAW_NEG(r_A, r_A)); - break; - case BPF_RET | BPF_K: - PPC_LI32(r_ret, K); - if (!K) { - if (ctx->pc_ret0 == -1) - ctx->pc_ret0 = i; - } - /* - * If this isn't the very last instruction, branch to - * the epilogue if we've stuff to clean up. Otherwise, - * if there's nothing to tidy, just return. If we /are/ - * the last instruction, we're about to fall through to - * the epilogue to return. - */ - if (i != flen - 1) { - /* - * Note: 'seen' is properly valid only on pass - * #2. Both parts of this conditional are the - * same instruction size though, meaning the - * first pass will still correctly determine the - * code size/addresses. - */ - if (ctx->seen) - PPC_JMP(exit_addr); - else - EMIT(PPC_RAW_BLR()); - } - break; - case BPF_RET | BPF_A: - EMIT(PPC_RAW_MR(r_ret, r_A)); - if (i != flen - 1) { - if (ctx->seen) - PPC_JMP(exit_addr); - else - EMIT(PPC_RAW_BLR()); - } - break; - case BPF_MISC | BPF_TAX: /* X = A */ - EMIT(PPC_RAW_MR(r_X, r_A)); - break; - case BPF_MISC | BPF_TXA: /* A = X */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_MR(r_A, r_X)); - break; - - /*** Constant loads/M[] access ***/ - case BPF_LD | BPF_IMM: /* A = K */ - PPC_LI32(r_A, K); - break; - case BPF_LDX | BPF_IMM: /* X = K */ - PPC_LI32(r_X, K); - break; - case BPF_LD | BPF_MEM: /* A = mem[K] */ - EMIT(PPC_RAW_MR(r_A, r_M + (K & 0xf))); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_LDX | BPF_MEM: /* X = mem[K] */ - EMIT(PPC_RAW_MR(r_X, r_M + (K & 0xf))); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_ST: /* mem[K] = A */ - EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_A)); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_STX: /* mem[K] = X */ - EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_X)); - ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ - BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); - break; - case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ - PPC_LWZ_OFFS(r_A, r_skb, K); - break; - case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ - PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); - break; - - /*** Ancillary info loads ***/ - case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ - BUILD_BUG_ON(sizeof_field(struct sk_buff, - protocol) != 2); - PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff, - protocol)); - break; - case BPF_ANC | SKF_AD_IFINDEX: - case BPF_ANC | SKF_AD_HATYPE: - BUILD_BUG_ON(sizeof_field(struct net_device, - ifindex) != 4); - BUILD_BUG_ON(sizeof_field(struct net_device, - type) != 2); - PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, - dev)); - EMIT(PPC_RAW_CMPDI(r_scratch1, 0)); - if (ctx->pc_ret0 != -1) { - PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); - } else { - /* Exit, returning 0; first pass hits here. */ - PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12); - EMIT(PPC_RAW_LI(r_ret, 0)); - PPC_JMP(exit_addr); - } - if (code == (BPF_ANC | SKF_AD_IFINDEX)) { - PPC_LWZ_OFFS(r_A, r_scratch1, - offsetof(struct net_device, ifindex)); - } else { - PPC_LHZ_OFFS(r_A, r_scratch1, - offsetof(struct net_device, type)); - } - - break; - case BPF_ANC | SKF_AD_MARK: - BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - mark)); - break; - case BPF_ANC | SKF_AD_RXHASH: - BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - hash)); - break; - case BPF_ANC | SKF_AD_VLAN_TAG: - BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2); - - PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - vlan_tci)); - break; - case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: - PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET()); - if (PKT_VLAN_PRESENT_BIT) - EMIT(PPC_RAW_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT)); - if (PKT_VLAN_PRESENT_BIT < 7) - EMIT(PPC_RAW_ANDI(r_A, r_A, 1)); - break; - case BPF_ANC | SKF_AD_QUEUE: - BUILD_BUG_ON(sizeof_field(struct sk_buff, - queue_mapping) != 2); - PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - queue_mapping)); - break; - case BPF_ANC | SKF_AD_PKTTYPE: - PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET()); - EMIT(PPC_RAW_ANDI(r_A, r_A, PKT_TYPE_MAX)); - EMIT(PPC_RAW_SRWI(r_A, r_A, 5)); - break; - case BPF_ANC | SKF_AD_CPU: - PPC_BPF_LOAD_CPU(r_A); - break; - /*** Absolute loads from packet header/data ***/ - case BPF_LD | BPF_W | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_word); - goto common_load; - case BPF_LD | BPF_H | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_half); - goto common_load; - case BPF_LD | BPF_B | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte); - common_load: - /* Load from [K]. */ - ctx->seen |= SEEN_DATAREF; - PPC_FUNC_ADDR(r_scratch1, func); - EMIT(PPC_RAW_MTLR(r_scratch1)); - PPC_LI32(r_addr, K); - EMIT(PPC_RAW_BLRL()); - /* - * Helper returns 'lt' condition on error, and an - * appropriate return value in r3 - */ - PPC_BCC(COND_LT, exit_addr); - break; - - /*** Indirect loads from packet header/data ***/ - case BPF_LD | BPF_W | BPF_IND: - func = sk_load_word; - goto common_load_ind; - case BPF_LD | BPF_H | BPF_IND: - func = sk_load_half; - goto common_load_ind; - case BPF_LD | BPF_B | BPF_IND: - func = sk_load_byte; - common_load_ind: - /* - * Load from [X + K]. Negative offsets are tested for - * in the helper functions. - */ - ctx->seen |= SEEN_DATAREF | SEEN_XREG; - PPC_FUNC_ADDR(r_scratch1, func); - EMIT(PPC_RAW_MTLR(r_scratch1)); - EMIT(PPC_RAW_ADDI(r_addr, r_X, IMM_L(K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_addr, r_addr, IMM_HA(K))); - EMIT(PPC_RAW_BLRL()); - /* If error, cr0.LT set */ - PPC_BCC(COND_LT, exit_addr); - break; - - case BPF_LDX | BPF_B | BPF_MSH: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); - goto common_load; - break; - - /*** Jump and branches ***/ - case BPF_JMP | BPF_JA: - if (K != 0) - PPC_JMP(addrs[i + 1 + K]); - break; - - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGT | BPF_X: - true_cond = COND_GT; - goto cond_branch; - case BPF_JMP | BPF_JGE | BPF_K: - case BPF_JMP | BPF_JGE | BPF_X: - true_cond = COND_GE; - goto cond_branch; - case BPF_JMP | BPF_JEQ | BPF_K: - case BPF_JMP | BPF_JEQ | BPF_X: - true_cond = COND_EQ; - goto cond_branch; - case BPF_JMP | BPF_JSET | BPF_K: - case BPF_JMP | BPF_JSET | BPF_X: - true_cond = COND_NE; - cond_branch: - /* same targets, can avoid doing the test :) */ - if (filter[i].jt == filter[i].jf) { - if (filter[i].jt > 0) - PPC_JMP(addrs[i + 1 + filter[i].jt]); - break; - } - - switch (code) { - case BPF_JMP | BPF_JGT | BPF_X: - case BPF_JMP | BPF_JGE | BPF_X: - case BPF_JMP | BPF_JEQ | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_CMPLW(r_A, r_X)); - break; - case BPF_JMP | BPF_JSET | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, r_X)); - break; - case BPF_JMP | BPF_JEQ | BPF_K: - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGE | BPF_K: - if (K < 32768) - EMIT(PPC_RAW_CMPLWI(r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_CMPLW(r_A, r_scratch1)); - } - break; - case BPF_JMP | BPF_JSET | BPF_K: - if (K < 32768) - /* PPC_ANDI is /only/ dot-form */ - EMIT(PPC_RAW_ANDI(r_scratch1, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, - r_scratch1)); - } - break; - } - /* Sometimes branches are constructed "backward", with - * the false path being the branch and true path being - * a fallthrough to the next instruction. - */ - if (filter[i].jt == 0) - /* Swap the sense of the branch */ - PPC_BCC(true_cond ^ COND_CMP_TRUE, - addrs[i + 1 + filter[i].jf]); - else { - PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]); - if (filter[i].jf != 0) - PPC_JMP(addrs[i + 1 + filter[i].jf]); - } - break; - default: - /* The filter contains something cruel & unusual. - * We don't handle it, but also there shouldn't be - * anything missing from our list. - */ - if (printk_ratelimit()) - pr_err("BPF filter opcode %04x (@%d) unsupported\n", - filter[i].code, i); - return -ENOTSUPP; - } - - } - /* Set end-of-body-code address for exit. */ - addrs[i] = ctx->idx * 4; - - return 0; -} - -void bpf_jit_compile(struct bpf_prog *fp) -{ - unsigned int proglen; - unsigned int alloclen; - u32 *image = NULL; - u32 *code_base; - unsigned int *addrs; - struct codegen_context cgctx; - int pass; - int flen = fp->len; - - if (!bpf_jit_enable) - return; - - addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) - return; - - /* - * There are multiple assembly passes as the generated code will change - * size as it settles down, figuring out the max branch offsets/exit - * paths required. - * - * The range of standard conditional branches is +/- 32Kbytes. Since - * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to - * finish with 8 bytes/instruction. Not feasible, so long jumps are - * used, distinct from short branches. - * - * Current: - * - * For now, both branch types assemble to 2 words (short branches padded - * with a NOP); this is less efficient, but assembly will always complete - * after exactly 3 passes: - * - * First pass: No code buffer; Program is "faux-generated" -- no code - * emitted but maximum size of output determined (and addrs[] filled - * in). Also, we note whether we use M[], whether we use skb data, etc. - * All generation choices assumed to be 'worst-case', e.g. branches all - * far (2 instructions), return path code reduction not available, etc. - * - * Second pass: Code buffer allocated with size determined previously. - * Prologue generated to support features we have seen used. Exit paths - * determined and addrs[] is filled in again, as code may be slightly - * smaller as a result. - * - * Third pass: Code generated 'for real', and branch destinations - * determined from now-accurate addrs[] map. - * - * Ideal: - * - * If we optimise this, near branches will be shorter. On the - * first assembly pass, we should err on the side of caution and - * generate the biggest code. On subsequent passes, branches will be - * generated short or long and code size will reduce. With smaller - * code, more branches may fall into the short category, and code will - * reduce more. - * - * Finally, if we see one pass generate code the same size as the - * previous pass we have converged and should now generate code for - * real. Allocating at the end will also save the memory that would - * otherwise be wasted by the (small) current code shrinkage. - * Preferably, we should do a small number of passes (e.g. 5) and if we - * haven't converged by then, get impatient and force code to generate - * as-is, even if the odd branch would be left long. The chances of a - * long jump are tiny with all but the most enormous of BPF filter - * inputs, so we should usually converge on the third pass. - */ - - cgctx.idx = 0; - cgctx.seen = 0; - cgctx.pc_ret0 = -1; - /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) - /* We hit something illegal or unsupported. */ - goto out; - - /* - * Pretend to build prologue, given the features we've seen. This will - * update ctgtx.idx as it pretends to output instructions, then we can - * calculate total size from idx. - */ - bpf_jit_build_prologue(fp, 0, &cgctx); - bpf_jit_build_epilogue(0, &cgctx); - - proglen = cgctx.idx * 4; - alloclen = proglen + FUNCTION_DESCR_SIZE; - image = module_alloc(alloclen); - if (!image) - goto out; - - code_base = image + (FUNCTION_DESCR_SIZE/4); - - /* Code generation passes 1-2 */ - for (pass = 1; pass < 3; pass++) { - /* Now build the prologue, body code & epilogue for real. */ - cgctx.idx = 0; - bpf_jit_build_prologue(fp, code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs); - bpf_jit_build_epilogue(code_base, &cgctx); - - if (bpf_jit_enable > 1) - pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, - proglen - (cgctx.idx * 4), cgctx.seen); - } - - if (bpf_jit_enable > 1) - /* Note that we output the base address of the code_base - * rather than image, since opcodes are in code_base. - */ - bpf_jit_dump(flen, proglen, pass, code_base); - - bpf_flush_icache(code_base, code_base + (proglen/4)); - -#ifdef CONFIG_PPC64 - /* Function descriptor nastiness: Address + TOC */ - ((u64 *)image)[0] = (u64)code_base; - ((u64 *)image)[1] = local_paca->kernel_toc; -#endif - - fp->bpf_func = (void *)image; - fp->jited = 1; - -out: - kfree(addrs); - return; -} - -void bpf_jit_free(struct bpf_prog *fp) -{ - if (fp->jited) - module_memfree(fp->bpf_func); - - bpf_prog_unlock_free(fp); -} From ed573b57e77a7860fe4026e1700faa2f6938caf1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:47 +0000 Subject: [PATCH 134/302] powerpc/bpf: Change register numbering for bpf_set/is_seen_register() Instead of using BPF register number as input in functions bpf_set_seen_register() and bpf_is_seen_register(), use CPU register number directly. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0cd2506f598e7095ea43e62dca1f472de5474a0d.1616430991.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp64.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index aaf1a887f653b8..51b3f440288c43 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -31,12 +31,12 @@ static inline void bpf_flush_icache(void *start, void *end) static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) { - return (ctx->seen & (1 << (31 - b2p[i]))); + return ctx->seen & (1 << (31 - i)); } static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) { - ctx->seen |= (1 << (31 - b2p[i])); + ctx->seen |= 1 << (31 - i); } static inline bool bpf_has_stack_frame(struct codegen_context *ctx) @@ -47,7 +47,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * - the bpf program uses its stack area * The latter condition is deduced from the usage of BPF_REG_FP */ - return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); + return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]); } /* @@ -124,11 +124,11 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * in the protected zone below the previous stack frame */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, i)) + if (bpf_is_seen_register(ctx, b2p[i])) PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, BPF_REG_FP)) + if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -139,7 +139,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, i)) + if (bpf_is_seen_register(ctx, b2p[i])) PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); /* Tear down our stack frame */ @@ -330,9 +330,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, * any issues. */ if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) - bpf_set_seen_register(ctx, insn[i].dst_reg); + bpf_set_seen_register(ctx, dst_reg); if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) - bpf_set_seen_register(ctx, insn[i].src_reg); + bpf_set_seen_register(ctx, src_reg); switch (code) { /* From f1b1583d5faa86cb3dcb7b740594868debad7c30 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:48 +0000 Subject: [PATCH 135/302] powerpc/bpf: Move common helpers into bpf_jit.h Move functions bpf_flush_icache(), bpf_is_seen_register() and bpf_set_seen_register() in order to reuse them in future bpf_jit_comp32.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/28e8d5a75e64807d7e9d39a4b52658755e259f8c.1616430991.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit.h | 35 +++++++++++++++++++++++++++++++ arch/powerpc/net/bpf_jit64.h | 19 ----------------- arch/powerpc/net/bpf_jit_comp64.c | 16 -------------- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index d0a67a1bbaf188..b8fa6908fc5e7e 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -108,6 +108,41 @@ static inline bool is_nearbranch(int offset) #define COND_LT (CR0_LT | COND_CMP_TRUE) #define COND_LE (CR0_GT | COND_CMP_FALSE) +#define SEEN_FUNC 0x1000 /* might call external helpers */ +#define SEEN_STACK 0x2000 /* uses BPF stack */ +#define SEEN_TAILCALL 0x4000 /* uses tail calls */ + +struct codegen_context { + /* + * This is used to track register usage as well + * as calls to external helpers. + * - register usage is tracked with corresponding + * bits (r3-r10 and r27-r31) + * - rest of the bits can be used to track other + * things -- for now, we use bits 16 to 23 + * encoded in SEEN_* macros above + */ + unsigned int seen; + unsigned int idx; + unsigned int stack_size; +}; + +static inline void bpf_flush_icache(void *start, void *end) +{ + smp_wmb(); /* smp write barrier */ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) +{ + return ctx->seen & (1 << (31 - i)); +} + +static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) +{ + ctx->seen |= 1 << (31 - i); +} + #endif #endif diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 2e33c6673ff957..b05f2e67bba140 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -86,25 +86,6 @@ static const int b2p[] = { } while(0) #define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0) -#define SEEN_FUNC 0x1000 /* might call external helpers */ -#define SEEN_STACK 0x2000 /* uses BPF stack */ -#define SEEN_TAILCALL 0x4000 /* uses tail calls */ - -struct codegen_context { - /* - * This is used to track register usage as well - * as calls to external helpers. - * - register usage is tracked with corresponding - * bits (r3-r10 and r27-r31) - * - rest of the bits can be used to track other - * things -- for now, we use bits 16 to 23 - * encoded in SEEN_* macros above - */ - unsigned int seen; - unsigned int idx; - unsigned int stack_size; -}; - #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 51b3f440288c43..111451bc5cc099 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -23,22 +23,6 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size) memset32(area, BREAKPOINT_INSTRUCTION, size/4); } -static inline void bpf_flush_icache(void *start, void *end) -{ - smp_wmb(); - flush_icache_range((unsigned long)start, (unsigned long)end); -} - -static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) -{ - return ctx->seen & (1 << (31 - i)); -} - -static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) -{ - ctx->seen |= 1 << (31 - i); -} - static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { /* From 4ea76e90a97d22f86adbb10044d29d919e620f2e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:49 +0000 Subject: [PATCH 136/302] powerpc/bpf: Move common functions into bpf_jit_comp.c Move into bpf_jit_comp.c the functions that will remain common to PPC64 and PPC32 when we add support of EBPF for PPC32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2c339d77fb168ef12b213ccddfee3cb6c8ce8ae1.1616430991.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/Makefile | 2 +- arch/powerpc/net/bpf_jit.h | 6 + arch/powerpc/net/bpf_jit_comp.c | 269 ++++++++++++++++++++++++++++++ arch/powerpc/net/bpf_jit_comp64.c | 263 +---------------------------- 4 files changed, 281 insertions(+), 259 deletions(-) create mode 100644 arch/powerpc/net/bpf_jit_comp.c diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 52c939cef5b2ab..969cde177880d4 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -2,4 +2,4 @@ # # Arch-specific network modules # -obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp64.o diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index b8fa6908fc5e7e..b34abfce15a65e 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -143,6 +143,12 @@ static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) ctx->seen |= 1 << (31 - i); } +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass); +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); + #endif #endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c new file mode 100644 index 00000000000000..efac899648733a --- /dev/null +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * eBPF JIT compiler + * + * Copyright 2016 Naveen N. Rao + * IBM Corporation + * + * Based on the powerpc classic BPF JIT compiler by Matt Evans + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_jit.h" + +static void bpf_jit_fill_ill_insns(void *area, unsigned int size) +{ + memset32(area, BREAKPOINT_INSTRUCTION, size / 4); +} + +/* Fix the branch target addresses for subprog calls */ +static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) +{ + const struct bpf_insn *insn = fp->insnsi; + bool func_addr_fixed; + u64 func_addr; + u32 tmp_idx; + int i, ret; + + for (i = 0; i < fp->len; i++) { + /* + * During the extra pass, only the branch target addresses for + * the subprog calls need to be fixed. All other instructions + * can left untouched. + * + * The JITed image length does not change because we already + * ensure that the JITed instruction sequence for these calls + * are of fixed length by padding them with NOPs. + */ + if (insn[i].code == (BPF_JMP | BPF_CALL) && + insn[i].src_reg == BPF_PSEUDO_CALL) { + ret = bpf_jit_get_func_addr(fp, &insn[i], true, + &func_addr, + &func_addr_fixed); + if (ret < 0) + return ret; + + /* + * Save ctx->idx as this would currently point to the + * end of the JITed image and set it to the offset of + * the instruction sequence corresponding to the + * subprog call temporarily. + */ + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + /* + * Restore ctx->idx here. This is safe as the length + * of the JITed sequence remains unchanged. + */ + ctx->idx = tmp_idx; + } + } + + return 0; +} + +struct powerpc64_jit_data { + struct bpf_binary_header *header; + u32 *addrs; + u8 *image; + u32 proglen; + struct codegen_context ctx; +}; + +bool bpf_jit_needs_zext(void) +{ + return true; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) +{ + u32 proglen; + u32 alloclen; + u8 *image = NULL; + u32 *code_base; + u32 *addrs; + struct powerpc64_jit_data *jit_data; + struct codegen_context cgctx; + int pass; + int flen; + struct bpf_binary_header *bpf_hdr; + struct bpf_prog *org_fp = fp; + struct bpf_prog *tmp_fp; + bool bpf_blinded = false; + bool extra_pass = false; + + if (!fp->jit_requested) + return org_fp; + + tmp_fp = bpf_jit_blind_constants(org_fp); + if (IS_ERR(tmp_fp)) + return org_fp; + + if (tmp_fp != org_fp) { + bpf_blinded = true; + fp = tmp_fp; + } + + jit_data = fp->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + fp = org_fp; + goto out; + } + fp->aux->jit_data = jit_data; + } + + flen = fp->len; + addrs = jit_data->addrs; + if (addrs) { + cgctx = jit_data->ctx; + image = jit_data->image; + bpf_hdr = jit_data->header; + proglen = jit_data->proglen; + alloclen = proglen + FUNCTION_DESCR_SIZE; + extra_pass = true; + goto skip_init_ctx; + } + + addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); + if (addrs == NULL) { + fp = org_fp; + goto out_addrs; + } + + memset(&cgctx, 0, sizeof(struct codegen_context)); + + /* Make sure that the stack is quadword aligned. */ + cgctx.stack_size = round_up(fp->aux->stack_depth, 16); + + /* Scouting faux-generate pass 0 */ + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + /* We hit something illegal or unsupported. */ + fp = org_fp; + goto out_addrs; + } + + /* + * If we have seen a tail call, we need a second pass. + * This is because bpf_jit_emit_common_epilogue() is called + * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + */ + if (cgctx.seen & SEEN_TAILCALL) { + cgctx.idx = 0; + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + fp = org_fp; + goto out_addrs; + } + } + + /* + * Pretend to build prologue, given the features we've seen. This will + * update ctgtx.idx as it pretends to output instructions, then we can + * calculate total size from idx. + */ + bpf_jit_build_prologue(0, &cgctx); + bpf_jit_build_epilogue(0, &cgctx); + + proglen = cgctx.idx * 4; + alloclen = proglen + FUNCTION_DESCR_SIZE; + + bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); + if (!bpf_hdr) { + fp = org_fp; + goto out_addrs; + } + +skip_init_ctx: + code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + + if (extra_pass) { + /* + * Do not touch the prologue and epilogue as they will remain + * unchanged. Only fix the branch target address for subprog + * calls in the body. + * + * This does not change the offsets and lengths of the subprog + * call instruction sequences and hence, the size of the JITed + * image as well. + */ + bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + + /* There is no need to perform the usual passes. */ + goto skip_codegen_passes; + } + + /* Code generation passes 1-2 */ + for (pass = 1; pass < 3; pass++) { + /* Now build the prologue, body code & epilogue for real. */ + cgctx.idx = 0; + bpf_jit_build_prologue(code_base, &cgctx); + bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); + bpf_jit_build_epilogue(code_base, &cgctx); + + if (bpf_jit_enable > 1) + pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, + proglen - (cgctx.idx * 4), cgctx.seen); + } + +skip_codegen_passes: + if (bpf_jit_enable > 1) + /* + * Note that we output the base address of the code_base + * rather than image, since opcodes are in code_base. + */ + bpf_jit_dump(flen, proglen, pass, code_base); + +#ifdef PPC64_ELF_ABI_v1 + /* Function descriptor nastiness: Address + TOC */ + ((u64 *)image)[0] = (u64)code_base; + ((u64 *)image)[1] = local_paca->kernel_toc; +#endif + + fp->bpf_func = (void *)image; + fp->jited = 1; + fp->jited_len = alloclen; + + bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + if (!fp->is_func || extra_pass) { + bpf_prog_fill_jited_linfo(fp, addrs); +out_addrs: + kfree(addrs); + kfree(jit_data); + fp->aux->jit_data = NULL; + } else { + jit_data->addrs = addrs; + jit_data->ctx = cgctx; + jit_data->proglen = proglen; + jit_data->image = image; + jit_data->header = bpf_hdr; + } + +out: + if (bpf_blinded) + bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); + + return fp; +} + +/* Overriding bpf_jit_free() as we don't set images read-only. */ +void bpf_jit_free(struct bpf_prog *fp) +{ + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *bpf_hdr = (void *)addr; + + if (fp->jited) + bpf_jit_binary_free(bpf_hdr); + + bpf_prog_unlock_free(fp); +} diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 111451bc5cc099..8a1f9fb00e7805 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -18,11 +18,6 @@ #include "bpf_jit64.h" -static void bpf_jit_fill_ill_insns(void *area, unsigned int size) -{ - memset32(area, BREAKPOINT_INSTRUCTION, size/4); -} - static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { /* @@ -69,7 +64,7 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) BUG(); } -static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; @@ -136,7 +131,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx } } -static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { bpf_jit_emit_common_epilogue(image, ctx); @@ -171,8 +166,7 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, EMIT(PPC_RAW_BLRL()); } -static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, - u64 func) +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { unsigned int i, ctx_idx = ctx->idx; @@ -273,9 +267,8 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 } /* Assemble the body code between the prologue & epilogue */ -static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, - u32 *addrs, bool extra_pass) +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -1010,249 +1003,3 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, return 0; } - -/* Fix the branch target addresses for subprog calls */ -static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, u32 *addrs) -{ - const struct bpf_insn *insn = fp->insnsi; - bool func_addr_fixed; - u64 func_addr; - u32 tmp_idx; - int i, ret; - - for (i = 0; i < fp->len; i++) { - /* - * During the extra pass, only the branch target addresses for - * the subprog calls need to be fixed. All other instructions - * can left untouched. - * - * The JITed image length does not change because we already - * ensure that the JITed instruction sequence for these calls - * are of fixed length by padding them with NOPs. - */ - if (insn[i].code == (BPF_JMP | BPF_CALL) && - insn[i].src_reg == BPF_PSEUDO_CALL) { - ret = bpf_jit_get_func_addr(fp, &insn[i], true, - &func_addr, - &func_addr_fixed); - if (ret < 0) - return ret; - - /* - * Save ctx->idx as this would currently point to the - * end of the JITed image and set it to the offset of - * the instruction sequence corresponding to the - * subprog call temporarily. - */ - tmp_idx = ctx->idx; - ctx->idx = addrs[i] / 4; - bpf_jit_emit_func_call_rel(image, ctx, func_addr); - - /* - * Restore ctx->idx here. This is safe as the length - * of the JITed sequence remains unchanged. - */ - ctx->idx = tmp_idx; - } - } - - return 0; -} - -struct powerpc64_jit_data { - struct bpf_binary_header *header; - u32 *addrs; - u8 *image; - u32 proglen; - struct codegen_context ctx; -}; - -bool bpf_jit_needs_zext(void) -{ - return true; -} - -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) -{ - u32 proglen; - u32 alloclen; - u8 *image = NULL; - u32 *code_base; - u32 *addrs; - struct powerpc64_jit_data *jit_data; - struct codegen_context cgctx; - int pass; - int flen; - struct bpf_binary_header *bpf_hdr; - struct bpf_prog *org_fp = fp; - struct bpf_prog *tmp_fp; - bool bpf_blinded = false; - bool extra_pass = false; - - if (!fp->jit_requested) - return org_fp; - - tmp_fp = bpf_jit_blind_constants(org_fp); - if (IS_ERR(tmp_fp)) - return org_fp; - - if (tmp_fp != org_fp) { - bpf_blinded = true; - fp = tmp_fp; - } - - jit_data = fp->aux->jit_data; - if (!jit_data) { - jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); - if (!jit_data) { - fp = org_fp; - goto out; - } - fp->aux->jit_data = jit_data; - } - - flen = fp->len; - addrs = jit_data->addrs; - if (addrs) { - cgctx = jit_data->ctx; - image = jit_data->image; - bpf_hdr = jit_data->header; - proglen = jit_data->proglen; - alloclen = proglen + FUNCTION_DESCR_SIZE; - extra_pass = true; - goto skip_init_ctx; - } - - addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) { - fp = org_fp; - goto out_addrs; - } - - memset(&cgctx, 0, sizeof(struct codegen_context)); - - /* Make sure that the stack is quadword aligned. */ - cgctx.stack_size = round_up(fp->aux->stack_depth, 16); - - /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { - /* We hit something illegal or unsupported. */ - fp = org_fp; - goto out_addrs; - } - - /* - * If we have seen a tail call, we need a second pass. - * This is because bpf_jit_emit_common_epilogue() is called - * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. - */ - if (cgctx.seen & SEEN_TAILCALL) { - cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { - fp = org_fp; - goto out_addrs; - } - } - - /* - * Pretend to build prologue, given the features we've seen. This will - * update ctgtx.idx as it pretends to output instructions, then we can - * calculate total size from idx. - */ - bpf_jit_build_prologue(0, &cgctx); - bpf_jit_build_epilogue(0, &cgctx); - - proglen = cgctx.idx * 4; - alloclen = proglen + FUNCTION_DESCR_SIZE; - - bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, - bpf_jit_fill_ill_insns); - if (!bpf_hdr) { - fp = org_fp; - goto out_addrs; - } - -skip_init_ctx: - code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); - - if (extra_pass) { - /* - * Do not touch the prologue and epilogue as they will remain - * unchanged. Only fix the branch target address for subprog - * calls in the body. - * - * This does not change the offsets and lengths of the subprog - * call instruction sequences and hence, the size of the JITed - * image as well. - */ - bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); - - /* There is no need to perform the usual passes. */ - goto skip_codegen_passes; - } - - /* Code generation passes 1-2 */ - for (pass = 1; pass < 3; pass++) { - /* Now build the prologue, body code & epilogue for real. */ - cgctx.idx = 0; - bpf_jit_build_prologue(code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); - bpf_jit_build_epilogue(code_base, &cgctx); - - if (bpf_jit_enable > 1) - pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, - proglen - (cgctx.idx * 4), cgctx.seen); - } - -skip_codegen_passes: - if (bpf_jit_enable > 1) - /* - * Note that we output the base address of the code_base - * rather than image, since opcodes are in code_base. - */ - bpf_jit_dump(flen, proglen, pass, code_base); - -#ifdef PPC64_ELF_ABI_v1 - /* Function descriptor nastiness: Address + TOC */ - ((u64 *)image)[0] = (u64)code_base; - ((u64 *)image)[1] = local_paca->kernel_toc; -#endif - - fp->bpf_func = (void *)image; - fp->jited = 1; - fp->jited_len = alloclen; - - bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); - if (!fp->is_func || extra_pass) { - bpf_prog_fill_jited_linfo(fp, addrs); -out_addrs: - kfree(addrs); - kfree(jit_data); - fp->aux->jit_data = NULL; - } else { - jit_data->addrs = addrs; - jit_data->ctx = cgctx; - jit_data->proglen = proglen; - jit_data->image = image; - jit_data->header = bpf_hdr; - } - -out: - if (bpf_blinded) - bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); - - return fp; -} - -/* Overriding bpf_jit_free() as we don't set images read-only. */ -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *bpf_hdr = (void *)addr; - - if (fp->jited) - bpf_jit_binary_free(bpf_hdr); - - bpf_prog_unlock_free(fp); -} From c426810fcf9f96e3b43d16039e41ecb959f6dc29 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:50 +0000 Subject: [PATCH 137/302] powerpc/bpf: Change values of SEEN_ flags Because PPC32 will use more non volatile registers, move SEEN_ flags to positions 0-2 which corresponds to special registers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/608faa1dc3ecfead649e15392abd07b00313d2ba.1616430991.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index b34abfce15a65e..fb4656986fb940 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -108,18 +108,18 @@ static inline bool is_nearbranch(int offset) #define COND_LT (CR0_LT | COND_CMP_TRUE) #define COND_LE (CR0_GT | COND_CMP_FALSE) -#define SEEN_FUNC 0x1000 /* might call external helpers */ -#define SEEN_STACK 0x2000 /* uses BPF stack */ -#define SEEN_TAILCALL 0x4000 /* uses tail calls */ +#define SEEN_FUNC 0x20000000 /* might call external helpers */ +#define SEEN_STACK 0x40000000 /* uses BPF stack */ +#define SEEN_TAILCALL 0x80000000 /* uses tail calls */ struct codegen_context { /* * This is used to track register usage as well * as calls to external helpers. * - register usage is tracked with corresponding - * bits (r3-r10 and r27-r31) + * bits (r3-r31) * - rest of the bits can be used to track other - * things -- for now, we use bits 16 to 23 + * things -- for now, we use bits 0 to 2 * encoded in SEEN_* macros above */ unsigned int seen; From 355a8d26cd0416e7e764e4db766cf91e773a03e7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:51 +0000 Subject: [PATCH 138/302] powerpc/asm: Add some opcodes in asm/ppc-opcode.h for PPC32 eBPF The following opcodes will be needed for the implementation of eBPF for PPC32. Add them in asm/ppc-opcode.h PPC_RAW_ADDE PPC_RAW_ADDZE PPC_RAW_ADDME PPC_RAW_MFLR PPC_RAW_ADDIC PPC_RAW_ADDIC_DOT PPC_RAW_SUBFC PPC_RAW_SUBFE PPC_RAW_SUBFIC PPC_RAW_SUBFZE PPC_RAW_ANDIS PPC_RAW_NOR Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f7bd573a368edd78006f8a5af508c726e7ce1ed2.1616430991.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ed161ef2b3ca68..5b60020dc1f43c 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -437,6 +437,9 @@ #define PPC_RAW_STFDX(s, a, b) (0x7c0005ae | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_LVX(t, a, b) (0x7c0000ce | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_STVX(s, a, b) (0x7c0001ce | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADDE(t, a, b) (0x7c000114 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADDZE(t, a) (0x7c000194 | ___PPC_RT(t) | ___PPC_RA(a)) +#define PPC_RAW_ADDME(t, a) (0x7c0001d4 | ___PPC_RT(t) | ___PPC_RA(a)) #define PPC_RAW_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) @@ -445,11 +448,14 @@ #define PPC_RAW_BLR() (PPC_INST_BLR) #define PPC_RAW_BLRL() (0x4e800021) #define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) +#define PPC_RAW_MFLR(t) (PPC_INST_MFLR | ___PPC_RT(t)) #define PPC_RAW_BCTR() (PPC_INST_BCTR) #define PPC_RAW_MTCTR(r) (PPC_INST_MTCTR | ___PPC_RT(r)) #define PPC_RAW_ADDI(d, a, i) (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i) #define PPC_RAW_ADDIS(d, a, i) (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_ADDIC(d, a, i) (0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_ADDIC_DOT(d, a, i) (0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LIS(r, i) PPC_RAW_ADDIS(r, 0, i) #define PPC_RAW_STDX(r, base, b) (0x7c00012a | ___PPC_RS(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_STDU(r, base, i) (0xf8000001 | ___PPC_RS(r) | ___PPC_RA(base) | ((i) & 0xfffc)) @@ -472,6 +478,10 @@ #define PPC_RAW_CMPLW(a, b) (0x7c000040 | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_CMPLD(a, b) (0x7c200040 | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_SUB(d, a, b) (0x7c000050 | ___PPC_RT(d) | ___PPC_RB(a) | ___PPC_RA(b)) +#define PPC_RAW_SUBFC(d, a, b) (0x7c000010 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SUBFE(d, a, b) (0x7c000110 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SUBFIC(d, a, i) (0x20000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_SUBFZE(d, a) (0x7c000190 | ___PPC_RT(d) | ___PPC_RA(a)) #define PPC_RAW_MULD(d, a, b) (0x7c0001d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_MULW(d, a, b) (0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_MULHWU(d, a, b) (0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) @@ -484,11 +494,13 @@ #define PPC_RAW_DIVDEU_DOT(t, a, b) (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define PPC_RAW_AND(d, a, b) (0x7c000038 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_ANDI(d, a, i) (0x70000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ANDIS(d, a, i) (0x74000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_AND_DOT(d, a, b) (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_OR(d, a, b) (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_MR(d, a) PPC_RAW_OR(d, a, a) #define PPC_RAW_ORI(d, a, i) (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_ORIS(d, a, i) (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_NOR(d, a, b) (0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_XORIS(d, a, i) (0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) From 51c66ad849a703d9bbfd7704c941827aed0fd9fd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:52 +0000 Subject: [PATCH 139/302] powerpc/bpf: Implement extended BPF on PPC32 Implement Extended Berkeley Packet Filter on Powerpc 32 Test result with test_bpf module: test_bpf: Summary: 378 PASSED, 0 FAILED, [354/366 JIT'ed] Registers mapping: [BPF_REG_0] = r11-r12 /* function arguments */ [BPF_REG_1] = r3-r4 [BPF_REG_2] = r5-r6 [BPF_REG_3] = r7-r8 [BPF_REG_4] = r9-r10 [BPF_REG_5] = r21-r22 (Args 9 and 10 come in via the stack) /* non volatile registers */ [BPF_REG_6] = r23-r24 [BPF_REG_7] = r25-r26 [BPF_REG_8] = r27-r28 [BPF_REG_9] = r29-r30 /* frame pointer aka BPF_REG_10 */ [BPF_REG_FP] = r17-r18 /* eBPF jit internal registers */ [BPF_REG_AX] = r19-r20 [TMP_REG] = r31 As PPC32 doesn't have a redzone in the stack, a stack frame must always be set in order to host at least the tail count counter. The stack frame remains for tail calls, it is set by the first callee and freed by the last callee. r0 is used as temporary register as much as possible. It is referenced directly in the code in order to avoid misusing it, because some instructions interpret it as value 0 instead of register r0 (ex: addi, addis, stw, lwz, ...) The following operations are not implemented: case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */ The following operations are only implemented for power of two constants: case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/61d8b149176ddf99e7d5cef0b6dc1598583ca202.1616430991.git.christophe.leroy@csgroup.eu --- Documentation/admin-guide/sysctl/net.rst | 2 +- arch/powerpc/Kconfig | 2 +- arch/powerpc/net/Makefile | 2 +- arch/powerpc/net/bpf_jit.h | 4 + arch/powerpc/net/bpf_jit_comp32.c | 1069 ++++++++++++++++++++++ 5 files changed, 1076 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/net/bpf_jit_comp32.c diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index f2ab8a5b6a4b86..685cc13f567bd4 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -64,6 +64,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - arm64 - arm32 - ppc64 + - ppc32 - sparc64 - mips64 - s390x @@ -73,7 +74,6 @@ two flavors of JITs, the newer eBPF JIT currently supported on: And the older cBPF JIT supported on the following archs: - mips - - ppc - sparc eBPF JITs are a superset of cBPF JITs, meaning the kernel will diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 29217437b8acf6..316ab0bf811283 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -202,7 +202,7 @@ config PPC select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL - select HAVE_EBPF_JIT if PPC64 + select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 969cde177880d4..8e60af32e51e14 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -2,4 +2,4 @@ # # Arch-specific network modules # -obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp64.o +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index fb4656986fb940..a45b8266355da7 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -42,6 +42,10 @@ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) +#ifdef CONFIG_PPC32 +#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) +#endif + #define PPC_LI64(d, i) do { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c new file mode 100644 index 00000000000000..29ce802d753457 --- /dev/null +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -0,0 +1,1069 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * eBPF JIT compiler for PPC32 + * + * Copyright 2020 Christophe Leroy + * CS GROUP France + * + * Based on PPC64 eBPF JIT compiler by Naveen N. Rao + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_jit.h" + +/* + * Stack layout: + * + * [ prev sp ] <------------- + * [ nv gpr save area ] 16 * 4 | + * fp (r31) --> [ ebpf stack space ] upto 512 | + * [ frame header ] 16 | + * sp (r1) ---> [ stack pointer ] -------------- + */ + +/* for gpr non volatile registers r17 to r31 (14) + tail call */ +#define BPF_PPC_STACK_SAVE (15 * 4 + 4) +/* stack frame, ensure this is quadword aligned */ +#define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size) + +/* BPF register usage */ +#define TMP_REG (MAX_BPF_JIT_REG + 0) + +/* BPF to ppc register mappings */ +static const int b2p[] = { + /* function return value */ + [BPF_REG_0] = 12, + /* function arguments */ + [BPF_REG_1] = 4, + [BPF_REG_2] = 6, + [BPF_REG_3] = 8, + [BPF_REG_4] = 10, + [BPF_REG_5] = 22, + /* non volatile registers */ + [BPF_REG_6] = 24, + [BPF_REG_7] = 26, + [BPF_REG_8] = 28, + [BPF_REG_9] = 30, + /* frame pointer aka BPF_REG_10 */ + [BPF_REG_FP] = 18, + /* eBPF jit internal registers */ + [BPF_REG_AX] = 20, + [TMP_REG] = 31, /* 32 bits */ +}; + +static int bpf_to_ppc(struct codegen_context *ctx, int reg) +{ + return b2p[reg]; +} + +/* PPC NVR range -- update this if we ever use NVRs below r17 */ +#define BPF_PPC_NVR_MIN 17 +#define BPF_PPC_TC 16 + +static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) +{ + if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC) + return BPF_PPC_STACKFRAME(ctx) - 4 * (32 - reg); + + WARN(true, "BPF JIT is asking about unknown registers, will crash the stack"); + /* Use the hole we have left for alignment */ + return BPF_PPC_STACKFRAME(ctx) - 4; +} + +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) +{ + int i; + + /* First arg comes in as a 32 bits pointer. */ + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); + EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx))); + + /* + * Initialize tail_call_cnt in stack frame if we do tail calls. + * Otherwise, put in NOPs so that it can be skipped when we are + * invoked through a tail call. + */ + if (ctx->seen & SEEN_TAILCALL) { + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + } else { + EMIT(PPC_RAW_NOP()); + } + +#define BPF_TAILCALL_PROLOGUE_SIZE 16 + + /* + * We need a stack frame, but we don't necessarily need to + * save/restore LR unless we call other functions + */ + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MFLR(__REG_R0)); + + /* + * Back up non-volatile regs -- registers r18-r31 + */ + for (i = BPF_PPC_NVR_MIN; i <= 31; i++) + if (bpf_is_seen_register(ctx, i)) + EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); + + /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12); + } + + /* Setup frame pointer to point to the bpf stack area */ + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { + EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1, + STACK_FRAME_MIN_SIZE + ctx->stack_size)); + } + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); +} + +static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) +{ + int i; + + /* Restore NVRs */ + for (i = BPF_PPC_NVR_MIN; i <= 31; i++) + if (bpf_is_seen_register(ctx, i)) + EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); +} + +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0))); + + bpf_jit_emit_common_epilogue(image, ctx); + + /* Tear down our stack frame */ + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + + EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MTLR(__REG_R0)); + + EMIT(PPC_RAW_BLR()); +} + +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +{ + /* Load function address into r0 */ + EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); + EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); + EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_BLRL()); +} + +static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +{ + /* + * By now, the eBPF program has already setup parameters in r3-r6 + * r3-r4/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program + * r5-r6/BPF_REG_2 - pointer to bpf_array + * r7-r8/BPF_REG_3 - index in bpf_array + */ + int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2); + int b2p_index = bpf_to_ppc(ctx, BPF_REG_3); + + /* + * if (index >= array->map.max_entries) + * goto out; + */ + EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0)); + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + PPC_BCC(COND_GE, out); + + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + */ + EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT)); + /* tail_call_cnt++; */ + EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1)); + PPC_BCC(COND_GT, out); + + /* prog = array->ptrs[index]; */ + EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29)); + EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array)); + EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs))); + EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + + /* + * if (prog == NULL) + * goto out; + */ + EMIT(PPC_RAW_CMPLWI(__REG_R3, 0)); + PPC_BCC(COND_EQ, out); + + /* goto *(prog->bpf_func + prologue_size); */ + EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + + EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE)); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MTLR(__REG_R0)); + + EMIT(PPC_RAW_MTCTR(__REG_R3)); + + EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1))); + + /* tear restore NVRs, ... */ + bpf_jit_emit_common_epilogue(image, ctx); + + EMIT(PPC_RAW_BCTR()); + /* out: */ +} + +/* Assemble the body code between the prologue & epilogue */ +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass) +{ + const struct bpf_insn *insn = fp->insnsi; + int flen = fp->len; + int i, ret; + + /* Start of epilogue code - will only be valid 2nd pass onwards */ + u32 exit_addr = addrs[flen]; + + for (i = 0; i < flen; i++) { + u32 code = insn[i].code; + u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg); + u32 dst_reg_h = dst_reg - 1; + u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); + u32 src_reg_h = src_reg - 1; + u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + s16 off = insn[i].off; + s32 imm = insn[i].imm; + bool func_addr_fixed; + u64 func_addr; + u32 true_cond; + + /* + * addrs[] maps a BPF bytecode address into a real offset from + * the start of the body code. + */ + addrs[i] = ctx->idx * 4; + + /* + * As an optimization, we note down which registers + * are used so that we can only save/restore those in our + * prologue and epilogue. We do this here regardless of whether + * the actual BPF instruction uses src/dst registers or not + * (for instance, BPF_CALL does not use them). The expectation + * is that those instructions will have src_reg/dst_reg set to + * 0. Even otherwise, we just lose some prologue/epilogue + * optimization but everything else should work without + * any issues. + */ + if (dst_reg >= 3 && dst_reg < 32) { + bpf_set_seen_register(ctx, dst_reg); + bpf_set_seen_register(ctx, dst_reg_h); + } + + if (src_reg >= 3 && src_reg < 32) { + bpf_set_seen_register(ctx, src_reg); + bpf_set_seen_register(ctx, src_reg_h); + } + + switch (code) { + /* + * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG + */ + case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADDE(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ + EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, dst_reg)); + EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, dst_reg_h)); + break; + case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ + imm = -imm; + fallthrough; + case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ + if (IMM_HA(imm) & 0xffff) + EMIT(PPC_RAW_ADDIS(dst_reg, dst_reg, IMM_HA(imm))); + if (IMM_L(imm)) + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); + break; + case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ + imm = -imm; + fallthrough; + case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ + if (!imm) + break; + + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0)); + } + if (imm >= 0) + EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h)); + else + EMIT(PPC_RAW_ADDME(dst_reg_h, dst_reg_h)); + break; + case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h)); + EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg)); + break; + case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0)); + } + break; + case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ + if (!imm) { + PPC_LI32(dst_reg, 0); + PPC_LI32(dst_reg_h, 0); + break; + } + if (imm == 1) + break; + if (imm == -1) { + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + break; + } + bpf_set_seen_register(ctx, tmp_reg); + PPC_LI32(tmp_reg, imm); + EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg)); + if (imm < 0) + EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg)); + EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + break; + case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ + EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ + return -EOPNOTSUPP; + case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ + return -EOPNOTSUPP; + case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ + if (!imm) + return -EINVAL; + if (imm == 1) + break; + + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ + if (!imm) + return -EINVAL; + + if (!is_power_of_2((u32)imm)) { + bpf_set_seen_register(ctx, tmp_reg); + PPC_LI32(tmp_reg, imm); + EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + break; + } + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2((u32)imm), 31)); + + break; + case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ + if (!imm) + return -EINVAL; + if (imm < 0) + imm = -imm; + if (!is_power_of_2(imm)) + return -EOPNOTSUPP; + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ + if (!imm) + return -EINVAL; + if (!is_power_of_2(abs(imm))) + return -EOPNOTSUPP; + + if (imm < 0) { + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + imm = -imm; + } + if (imm == 1) + break; + imm = ilog2(imm); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); + break; + case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ + EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); + break; + case BPF_ALU64 | BPF_NEG: /* dst = -dst */ + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + break; + + /* + * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH + */ + case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_AND(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ + if (imm >= 0) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + fallthrough; + case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ + if (!IMM_H(imm)) { + EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); + } else if (!IMM_L(imm)) { + EMIT(PPC_RAW_ANDIS(dst_reg, dst_reg, IMM_H(imm))); + } else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, + 32 - fls(imm), 32 - ffs(imm))); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0)); + } + break; + case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ + /* Sign-extended */ + if (imm < 0) + EMIT(PPC_RAW_LI(dst_reg_h, -1)); + fallthrough; + case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ + if (IMM_L(imm)) + EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_H(imm)) + EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); + break; + case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ + if (dst_reg == src_reg) { + EMIT(PPC_RAW_LI(dst_reg, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } else { + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_XOR(dst_reg_h, dst_reg_h, src_reg_h)); + } + break; + case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ + if (dst_reg == src_reg) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ + if (imm < 0) + EMIT(PPC_RAW_NOR(dst_reg_h, dst_reg_h, dst_reg_h)); + fallthrough; + case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ + if (IMM_L(imm)) + EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_H(imm)) + EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); + break; + case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ + EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32)); + PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4); + EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg, __REG_R0)); + EMIT(PPC_RAW_LI(dst_reg, 0)); + PPC_JMP((ctx->idx + 6) * 4); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0)); + break; + case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ + if (!imm) + break; + EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, imm, 0, 31 - imm)); + EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31)); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, imm, 0, 31 - imm)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg, imm, 0, 31 - imm)); + else + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + EMIT(PPC_RAW_LI(dst_reg, 0)); + break; + case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ + EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32)); + PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + PPC_JMP((ctx->idx + 6) * 4); + EMIT(PPC_RAW_SUBFIC(0, src_reg, 32)); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ + if (!imm) + break; + EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, 32 - imm, imm, 31)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg_h, 64 - imm, imm - 32, 31)); + else + EMIT(PPC_RAW_LI(dst_reg, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ + EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ + EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32)); + PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4); + EMIT(PPC_RAW_SRAW(dst_reg, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31)); + PPC_JMP((ctx->idx + 6) * 4); + EMIT(PPC_RAW_SUBFIC(0, src_reg, 32)); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ + if (!imm) + break; + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, imm - 32)); + else + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, 31)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31)); + break; + + /* + * MOV + */ + case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ + if (dst_reg == src_reg) + break; + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ + /* special mov32 for zext */ + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + else if (dst_reg != src_reg) + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ + PPC_LI32(dst_reg, imm); + PPC_EX32(dst_reg_h, imm); + break; + case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ + PPC_LI32(dst_reg, imm); + break; + + /* + * BPF_FROM_BE/LE + */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: + /* Copy 16 bits to upper part */ + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg, 16, 0, 15)); + /* Rotate 8 bits right & mask */ + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31)); + break; + case 32: + /* + * Rotate word left by 8 bits: + * 2 bytes are already in their final position + * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) + */ + EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31)); + /* Rotate 24 bits and insert byte 1 */ + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7)); + /* Rotate 24 bits and insert byte 3 */ + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + break; + case 64: + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31)); + /* Rotate 24 bits and insert byte 1 */ + EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7)); + /* Rotate 24 bits and insert byte 3 */ + EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg)); + break; + } + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm) { + case 16: + /* zero-extend 16 bits into 32 bits */ + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 16, 31)); + break; + case 32: + case 64: + /* nop */ + break; + } + break; + + /* + * BPF_ST(X) + */ + case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ + EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ + EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ + EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ + EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off)); + EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4)); + break; + case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4)); + PPC_EX32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + break; + + /* + * BPF_STX XADD (atomic_add) + */ + case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */ + bpf_set_seen_register(ctx, tmp_reg); + /* Get offset into TMP_REG */ + EMIT(PPC_RAW_LI(tmp_reg, off)); + /* load value from memory into r0 */ + EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0)); + /* add value from src_reg into this */ + EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg)); + /* store result back */ + EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg)); + /* we're done if this succeeded */ + PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4); + break; + + case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */ + return -EOPNOTSUPP; + + /* + * BPF_LDX + */ + case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + break; + + /* + * Doubleword load + * 16 byte instruction that uses two 'struct bpf_insn' + */ + case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); + PPC_LI32(dst_reg, (u32)insn[i].imm); + /* Adjust for two bpf instructions */ + addrs[++i] = ctx->idx * 4; + break; + + /* + * Return/Exit + */ + case BPF_JMP | BPF_EXIT: + /* + * If this isn't the very last instruction, branch to + * the epilogue. If we _are_ the last instruction, + * we'll just fall through to the epilogue. + */ + if (i != flen - 1) + PPC_JMP(exit_addr); + /* else fall through to the epilogue */ + break; + + /* + * Call kernel helper or bpf function + */ + case BPF_JMP | BPF_CALL: + ctx->seen |= SEEN_FUNC; + + ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12)); + } + + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4)); + break; + + /* + * Jumps and branches + */ + case BPF_JMP | BPF_JA: + PPC_JMP(addrs[i + 1 + off]); + break; + + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_X: + true_cond = COND_GT; + goto cond_branch; + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_X: + true_cond = COND_LT; + goto cond_branch; + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_X: + true_cond = COND_GE; + goto cond_branch; + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_X: + true_cond = COND_LE; + goto cond_branch; + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_X: + true_cond = COND_EQ; + goto cond_branch; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_X: + true_cond = COND_NE; + goto cond_branch; + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_X: + true_cond = COND_NE; + /* fallthrough; */ + +cond_branch: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + /* unsigned comparison */ + EMIT(PPC_RAW_CMPLW(dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + /* unsigned comparison */ + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + /* signed comparison */ + EMIT(PPC_RAW_CMPW(dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + /* signed comparison */ + EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JSET | BPF_X: + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JSET | BPF_X: { + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + /* + * Need sign-extended load, so only positive + * values can be used as imm in cmplwi + */ + if (imm >= 0 && imm < 32768) { + EMIT(PPC_RAW_CMPLWI(dst_reg_h, 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + /* sign-extending load ... but unsigned comparison */ + PPC_EX32(__REG_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0)); + PPC_LI32(__REG_R0, imm); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + if (imm >= 0 && imm < 65536) { + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + } + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + if (imm >= 0 && imm < 65536) { + EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + /* sign-extending load */ + EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); + PPC_LI32(__REG_R0, imm); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + /* + * signed comparison, so any 16-bit value + * can be used in cmpwi + */ + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_CMPWI(dst_reg, imm)); + } else { + /* sign-extending load */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP | BPF_JSET | BPF_K: + /* andi does not sign-extend the immediate */ + if (imm >= 0 && imm < 32768) { + /* PPC_ANDI is _only/always_ dot-form */ + EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + if (imm < 0) { + EMIT(PPC_RAW_CMPWI(dst_reg_h, 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + } + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JSET | BPF_K: + /* andi does not sign-extend the immediate */ + if (imm >= -32768 && imm < 32768) { + /* PPC_ANDI is _only/always_ dot-form */ + EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + } + break; + } + PPC_BCC(true_cond, addrs[i + 1 + off]); + break; + + /* + * Tail call + */ + case BPF_JMP | BPF_TAIL_CALL: + ctx->seen |= SEEN_TAILCALL; + bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + break; + + default: + /* + * The filter contains something cruel & unusual. + * We don't handle it, but also there shouldn't be + * anything missing from our list. + */ + pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", code, i); + return -EOPNOTSUPP; + } + if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext && + !insn_is_zext(&insn[i + 1])) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } + + /* Set end-of-body-code address for exit. */ + addrs[i] = ctx->idx * 4; + + return 0; +} From 40272035e1d0edcd515ad45be297c4cce044536d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 22 Mar 2021 16:37:53 +0000 Subject: [PATCH 140/302] powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32 When the BPF routine doesn't call any function, the non volatile registers can be reallocated to volatile registers in order to avoid having to save them/restore on the stack. Before this patch, the test #359 ADD default X is: 0: 7c 64 1b 78 mr r4,r3 4: 38 60 00 00 li r3,0 8: 94 21 ff b0 stwu r1,-80(r1) c: 60 00 00 00 nop 10: 92 e1 00 2c stw r23,44(r1) 14: 93 01 00 30 stw r24,48(r1) 18: 93 21 00 34 stw r25,52(r1) 1c: 93 41 00 38 stw r26,56(r1) 20: 39 80 00 00 li r12,0 24: 39 60 00 00 li r11,0 28: 3b 40 00 00 li r26,0 2c: 3b 20 00 00 li r25,0 30: 7c 98 23 78 mr r24,r4 34: 7c 77 1b 78 mr r23,r3 38: 39 80 00 42 li r12,66 3c: 39 60 00 00 li r11,0 40: 7d 8c d2 14 add r12,r12,r26 44: 39 60 00 00 li r11,0 48: 7d 83 63 78 mr r3,r12 4c: 82 e1 00 2c lwz r23,44(r1) 50: 83 01 00 30 lwz r24,48(r1) 54: 83 21 00 34 lwz r25,52(r1) 58: 83 41 00 38 lwz r26,56(r1) 5c: 38 21 00 50 addi r1,r1,80 60: 4e 80 00 20 blr After this patch, the same test has become: 0: 7c 64 1b 78 mr r4,r3 4: 38 60 00 00 li r3,0 8: 94 21 ff b0 stwu r1,-80(r1) c: 60 00 00 00 nop 10: 39 80 00 00 li r12,0 14: 39 60 00 00 li r11,0 18: 39 00 00 00 li r8,0 1c: 38 e0 00 00 li r7,0 20: 7c 86 23 78 mr r6,r4 24: 7c 65 1b 78 mr r5,r3 28: 39 80 00 42 li r12,66 2c: 39 60 00 00 li r11,0 30: 7d 8c 42 14 add r12,r12,r8 34: 39 60 00 00 li r11,0 38: 7d 83 63 78 mr r3,r12 3c: 38 21 00 50 addi r1,r1,80 40: 4e 80 00 20 blr Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit.h | 16 ++++++++++++++++ arch/powerpc/net/bpf_jit64.h | 2 +- arch/powerpc/net/bpf_jit_comp.c | 2 ++ arch/powerpc/net/bpf_jit_comp32.c | 30 ++++++++++++++++++++++++++++-- arch/powerpc/net/bpf_jit_comp64.c | 4 ++++ 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index a45b8266355da7..776abef4d2a0a7 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -116,6 +116,15 @@ static inline bool is_nearbranch(int offset) #define SEEN_STACK 0x40000000 /* uses BPF stack */ #define SEEN_TAILCALL 0x80000000 /* uses tail calls */ +#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ +#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ + +#ifdef CONFIG_PPC64 +extern const int b2p[MAX_BPF_JIT_REG + 2]; +#else +extern const int b2p[MAX_BPF_JIT_REG + 1]; +#endif + struct codegen_context { /* * This is used to track register usage as well @@ -129,6 +138,7 @@ struct codegen_context { unsigned int seen; unsigned int idx; unsigned int stack_size; + int b2p[ARRAY_SIZE(b2p)]; }; static inline void bpf_flush_icache(void *start, void *end) @@ -147,11 +157,17 @@ static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) ctx->seen |= 1 << (31 - i); } +static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) +{ + ctx->seen &= ~(1 << (31 - i)); +} + void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, bool extra_pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); +void bpf_jit_realloc_regs(struct codegen_context *ctx); #endif diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index b05f2e67bba140..7b713edfa7e261 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -39,7 +39,7 @@ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* BPF to ppc register mappings */ -static const int b2p[] = { +const int b2p[MAX_BPF_JIT_REG + 2] = { /* function return value */ [BPF_REG_0] = 8, /* function arguments */ diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index efac899648733a..798ac4350a82c5 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -143,6 +143,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } memset(&cgctx, 0, sizeof(struct codegen_context)); + memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p)); /* Make sure that the stack is quadword aligned. */ cgctx.stack_size = round_up(fp->aux->stack_depth, 16); @@ -167,6 +168,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } } + bpf_jit_realloc_regs(&cgctx); /* * Pretend to build prologue, given the features we've seen. This will * update ctgtx.idx as it pretends to output instructions, then we can diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 29ce802d753457..003843273b43ee 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -37,7 +37,7 @@ #define TMP_REG (MAX_BPF_JIT_REG + 0) /* BPF to ppc register mappings */ -static const int b2p[] = { +const int b2p[MAX_BPF_JIT_REG + 1] = { /* function return value */ [BPF_REG_0] = 12, /* function arguments */ @@ -60,7 +60,7 @@ static const int b2p[] = { static int bpf_to_ppc(struct codegen_context *ctx, int reg) { - return b2p[reg]; + return ctx->b2p[reg]; } /* PPC NVR range -- update this if we ever use NVRs below r17 */ @@ -77,6 +77,32 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) return BPF_PPC_STACKFRAME(ctx) - 4; } +void bpf_jit_realloc_regs(struct codegen_context *ctx) +{ + if (ctx->seen & SEEN_FUNC) + return; + + while (ctx->seen & SEEN_NVREG_MASK && + (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) { + int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab)); + int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa)); + int i; + + for (i = BPF_REG_0; i <= TMP_REG; i++) { + if (ctx->b2p[i] != old) + continue; + ctx->b2p[i] = new; + bpf_set_seen_register(ctx, new); + bpf_clear_seen_register(ctx, old); + if (i != TMP_REG) { + bpf_set_seen_register(ctx, new - 1); + bpf_clear_seen_register(ctx, old - 1); + } + break; + } + } +} + void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 8a1f9fb00e7805..57a8c1153851a0 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -64,6 +64,10 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) BUG(); } +void bpf_jit_realloc_regs(struct codegen_context *ctx) +{ +} + void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; From b0b3b2c78ec075cec4721986a95abbbac8c3da4f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 23 Mar 2021 15:47:59 +0000 Subject: [PATCH 141/302] powerpc: Switch to relative jump labels Convert powerpc to relative jump labels. Before the patch, pseries_defconfig vmlinux.o has: 9074 __jump_table 0003f2a0 0000000000000000 0000000000000000 01321fa8 2**0 With the patch, the same config gets: 9074 __jump_table 0002a0e0 0000000000000000 0000000000000000 01321fb4 2**0 Size is 258720 without the patch, 172256 with the patch. That's a 33% size reduction. Largely copied from commit c296146c058c ("arm64/kernel: jump_label: Switch to relative references") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/828348da7868eda953ce023994404dfc49603b64.1616514473.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/jump_label.h | 21 ++++++--------------- arch/powerpc/kernel/jump_label.c | 4 ++-- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 316ab0bf811283..048e2b2a5c638e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -184,6 +184,7 @@ config PPC select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KGDB diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 09297ec9fa5271..2d5c6bec2b4f33 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -20,7 +20,8 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran asm_volatile_goto("1:\n\t" "nop # arch_static_branch\n\t" ".pushsection __jump_table, \"aw\"\n\t" - JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" + ".long 1b - ., %l[l_yes] - .\n\t" + JUMP_ENTRY_TYPE "%c0 - .\n\t" ".popsection \n\t" : : "i" (&((char *)key)[branch]) : : l_yes); @@ -34,7 +35,8 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool asm_volatile_goto("1:\n\t" "b %l[l_yes] # arch_static_branch_jump\n\t" ".pushsection __jump_table, \"aw\"\n\t" - JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" + ".long 1b - ., %l[l_yes] - .\n\t" + JUMP_ENTRY_TYPE "%c0 - .\n\t" ".popsection \n\t" : : "i" (&((char *)key)[branch]) : : l_yes); @@ -43,23 +45,12 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool return true; } -#ifdef CONFIG_PPC64 -typedef u64 jump_label_t; -#else -typedef u32 jump_label_t; -#endif - -struct jump_entry { - jump_label_t code; - jump_label_t target; - jump_label_t key; -}; - #else #define ARCH_STATIC_BRANCH(LABEL, KEY) \ 1098: nop; \ .pushsection __jump_table, "aw"; \ - FTR_ENTRY_LONG 1098b, LABEL, KEY; \ + .long 1098b - ., LABEL - .; \ + FTR_ENTRY_LONG KEY; \ .popsection #endif diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 144858027fa38f..ce87dc5ea23cfa 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -11,10 +11,10 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - struct ppc_inst *addr = (struct ppc_inst *)(unsigned long)entry->code; + struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry); if (type == JUMP_LABEL_JMP) - patch_branch(addr, entry->target, 0); + patch_branch(addr, jump_entry_target(entry), 0); else patch_instruction(addr, ppc_inst(PPC_INST_NOP)); } From 4763d37827643750a39a8c7a9205928c09618a6f Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Wed, 20 Jan 2021 19:50:21 +0530 Subject: [PATCH 142/302] powerpc: Spelling/typo fixes Various spelling/typo fixes. Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpm2.h | 2 +- arch/powerpc/kernel/head_8xx.S | 2 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 2 +- drivers/macintosh/windfarm_smu_controls.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index 2211b934ecb4ea..bda45788cfccac 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -594,7 +594,7 @@ typedef struct fcc_enet { uint fen_p256c; /* Total packets 256 < bytes <= 511 */ uint fen_p512c; /* Total packets 512 < bytes <= 1023 */ uint fen_p1024c; /* Total packets 1024 < bytes <= 1518 */ - uint fen_cambuf; /* Internal CAM buffer poiner */ + uint fen_cambuf; /* Internal CAM buffer pointer */ ushort fen_rfthr; /* Received frames threshold */ ushort fen_rfcnt; /* Received frames count */ } fcc_enet_t; diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 34feb628c88d29..e3b066703eab28 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -804,7 +804,7 @@ EXPORT_SYMBOL(empty_zero_page) swapper_pg_dir: .space PGD_TABLE_SIZE -/* Room for two PTE table poiners, usually the kernel and current user +/* Room for two PTE table pointers, usually the kernel and current user * pointer to their respective root page table (pgdir). */ .globl abatron_pteptrs diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 98f0b243c1ab21..8b8f1451e94457 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1058,7 +1058,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, * Book3S does not require a TLB flush when relaxing access * restrictions when the address space is not attached to a * NMMU, because the core MMU will reload the pte after taking - * an access fault, which is defined by the architectue. + * an access fault, which is defined by the architecture. */ } /* See ptesync comment in radix__set_pte_at */ diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c index 79cb1ad09bfdef..75966052819a0f 100644 --- a/drivers/macintosh/windfarm_smu_controls.c +++ b/drivers/macintosh/windfarm_smu_controls.c @@ -94,7 +94,7 @@ static int smu_set_fan(int pwm, u8 id, u16 value) return rc; wait_for_completion(&comp); - /* Handle fallback (see coment above) */ + /* Handle fallback (see comment above) */ if (cmd.status != 0 && smu_supports_new_fans_ops) { printk(KERN_WARNING "windfarm: SMU failed new fan command " "falling back to old method\n"); From b8b2f37cf632434456182e9002d63cbc4cccc50c Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Mon, 8 Feb 2021 14:29:56 +1100 Subject: [PATCH 143/302] powerpc/64s: Fix pte update for kernel memory on radix When adding a PTE a ptesync is needed to order the update of the PTE with subsequent accesses otherwise a spurious fault may be raised. radix__set_pte_at() does not do this for performance gains. For non-kernel memory this is not an issue as any faults of this kind are corrected by the page fault handler. For kernel memory these faults are not handled. The current solution is that there is a ptesync in flush_cache_vmap() which should be called when mapping from the vmalloc region. However, map_kernel_page() does not call flush_cache_vmap(). This is troublesome in particular for code patching with Strict RWX on radix. In do_patch_instruction() the page frame that contains the instruction to be patched is mapped and then immediately patched. With no ordering or synchronization between setting up the PTE and writing to the page it is possible for faults. As the code patching is done using __put_user_asm_goto() the resulting fault is obscured - but using a normal store instead it can be seen: BUG: Unable to handle kernel data access on write at 0xc008000008f24a3c Faulting instruction address: 0xc00000000008bd74 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: nop_module(PO+) [last unloaded: nop_module] CPU: 4 PID: 757 Comm: sh Tainted: P O 5.10.0-rc5-01361-ge3c1b78c8440-dirty #43 NIP: c00000000008bd74 LR: c00000000008bd50 CTR: c000000000025810 REGS: c000000016f634a0 TRAP: 0300 Tainted: P O (5.10.0-rc5-01361-ge3c1b78c8440-dirty) MSR: 9000000000009033 CR: 44002884 XER: 00000000 CFAR: c00000000007c68c DAR: c008000008f24a3c DSISR: 42000000 IRQMASK: 1 This results in the kind of issue reported here: https://lore.kernel.org/linuxppc-dev/15AC5B0E-A221-4B8C-9039-FA96B8EF7C88@lca.pw/ Chris Riedl suggested a reliable way to reproduce the issue: $ mount -t debugfs none /sys/kernel/debug $ (while true; do echo function > /sys/kernel/debug/tracing/current_tracer ; echo nop > /sys/kernel/debug/tracing/current_tracer ; done) & Turning ftrace on and off does a large amount of code patching which in usually less then 5min will crash giving a trace like: ftrace-powerpc: (____ptrval____): replaced (4b473b11) != old (60000000) ------------[ ftrace bug ]------------ ftrace failed to modify [] napi_busy_loop+0xc/0x390 actual: 11:3b:47:4b Setting ftrace call site to call ftrace function ftrace record flags: 80000001 (1) expected tramp: c00000000006c96c ------------[ cut here ]------------ WARNING: CPU: 4 PID: 809 at kernel/trace/ftrace.c:2065 ftrace_bug+0x28c/0x2e8 Modules linked in: nop_module(PO-) [last unloaded: nop_module] CPU: 4 PID: 809 Comm: sh Tainted: P O 5.10.0-rc5-01360-gf878ccaf250a #1 NIP: c00000000024f334 LR: c00000000024f330 CTR: c0000000001a5af0 REGS: c000000004c8b760 TRAP: 0700 Tainted: P O (5.10.0-rc5-01360-gf878ccaf250a) MSR: 900000000282b033 CR: 28008848 XER: 20040000 CFAR: c0000000001a9c98 IRQMASK: 0 GPR00: c00000000024f330 c000000004c8b9f0 c000000002770600 0000000000000022 GPR04: 00000000ffff7fff c000000004c8b6d0 0000000000000027 c0000007fe9bcdd8 GPR08: 0000000000000023 ffffffffffffffd8 0000000000000027 c000000002613118 GPR12: 0000000000008000 c0000007fffdca00 0000000000000000 0000000000000000 GPR16: 0000000023ec37c5 0000000000000000 0000000000000000 0000000000000008 GPR20: c000000004c8bc90 c0000000027a2d20 c000000004c8bcd0 c000000002612fe8 GPR24: 0000000000000038 0000000000000030 0000000000000028 0000000000000020 GPR28: c000000000ff1b68 c000000000bf8e5c c00000000312f700 c000000000fbb9b0 NIP ftrace_bug+0x28c/0x2e8 LR ftrace_bug+0x288/0x2e8 Call Trace: ftrace_bug+0x288/0x2e8 (unreliable) ftrace_modify_all_code+0x168/0x210 arch_ftrace_update_code+0x18/0x30 ftrace_run_update_code+0x44/0xc0 ftrace_startup+0xf8/0x1c0 register_ftrace_function+0x4c/0xc0 function_trace_init+0x80/0xb0 tracing_set_tracer+0x2a4/0x4f0 tracing_set_trace_write+0xd4/0x130 vfs_write+0xf0/0x330 ksys_write+0x84/0x140 system_call_exception+0x14c/0x230 system_call_common+0xf0/0x27c To fix this when updating kernel memory PTEs using ptesync. Fixes: f1cb8f9beba8 ("powerpc/64s/radix: avoid ptesync after set_pte and ptep_set_access_flags") Signed-off-by: Jordan Niethe Reviewed-by: Nicholas Piggin [mpe: Tidy up change log slightly] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210208032957.1232102-1-jniethe5@gmail.com --- arch/powerpc/include/asm/book3s/64/radix.h | 6 ++++-- arch/powerpc/mm/book3s64/radix_pgtable.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index c7813dc628fc93..59cab558e2f057 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -222,8 +222,10 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr, * from ptesync, it should probably go into update_mmu_cache, rather * than set_pte_at (which is used to set ptes unrelated to faults). * - * Spurious faults to vmalloc region are not tolerated, so there is - * a ptesync in flush_cache_vmap. + * Spurious faults from the kernel memory are not tolerated, so there + * is a ptesync in flush_cache_vmap, and __map_kernel_page() follows + * the pte update sequence from ISA Book III 6.10 Translation Table + * Update Synchronization Requirements. */ } diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 8b8f1451e94457..55f26c0e389eb2 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -108,7 +108,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } @@ -168,7 +168,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } From 29e3ea8cbd2958cf237b84652ec236803f2c6202 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Mon, 8 Feb 2021 14:29:57 +1100 Subject: [PATCH 144/302] selftests/powerpc: Test for spurious kernel memory faults on radix Previously when mapping kernel memory on radix, no ptesync was included which would periodically lead to unhandled spurious faults. Mapping kernel memory is used when code patching with Strict RWX enabled. As suggested by Chris Riedl, turning ftrace on and off does a large amount of code patching so is a convenient way to see this kind of fault. Add a selftest to try and trigger this kind of a spurious fault. It tests for 30 seconds which is usually long enough for the issue to show up. Signed-off-by: Jordan Niethe [mpe: Rename it to better reflect what it does, rather than the symptom] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210208032957.1232102-2-jniethe5@gmail.com --- tools/testing/selftests/powerpc/mm/Makefile | 1 + .../powerpc/mm/stress_code_patching.sh | 49 +++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100755 tools/testing/selftests/powerpc/mm/stress_code_patching.sh diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index defe488d6bf111..40253abc62089b 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -5,6 +5,7 @@ noarg: TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ large_vm_fork_separation bad_accesses pkey_exec_prot \ pkey_siginfo stack_expansion_signal stack_expansion_ldst +TEST_PROGS := stress_code_patching.sh TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile diff --git a/tools/testing/selftests/powerpc/mm/stress_code_patching.sh b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh new file mode 100755 index 00000000000000..e454509659f6b4 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +TIMEOUT=30 + +DEBUFS_DIR=`cat /proc/mounts | grep debugfs | awk '{print $2}'` +if [ ! -e "$DEBUFS_DIR" ] +then + echo "debugfs not found, skipping" 1>&2 + exit 4 +fi + +if [ ! -e "$DEBUFS_DIR/tracing/current_tracer" ] +then + echo "Tracing files not found, skipping" 1>&2 + exit 4 +fi + + +echo "Testing for spurious faults when mapping kernel memory..." + +if grep -q "FUNCTION TRACING IS CORRUPTED" "$DEBUFS_DIR/tracing/trace" +then + echo "FAILED: Ftrace already dead. Probably due to a spurious fault" 1>&2 + exit 1 +fi + +dmesg -C +START_TIME=`date +%s` +END_TIME=`expr $START_TIME + $TIMEOUT` +while [ `date +%s` -lt $END_TIME ] +do + echo function > $DEBUFS_DIR/tracing/current_tracer + echo nop > $DEBUFS_DIR/tracing/current_tracer + if dmesg | grep -q 'ftrace bug' + then + break + fi +done + +echo nop > $DEBUFS_DIR/tracing/current_tracer +if dmesg | grep -q 'ftrace bug' +then + echo "FAILED: Mapping kernel memory causes spurious faults" 1>&2 + exit 1 +else + echo "OK: Mapping kernel memory does not cause spurious faults" + exit 0 +fi From 56bec2f9d4d05675cada96772a8a93010f4d82bf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Mar 2021 11:38:40 +1100 Subject: [PATCH 145/302] powerpc/mm/64s: Add _PAGE_KERNEL_ROX In the past we had a fallback definition for _PAGE_KERNEL_ROX, but we removed that in commit d82fd29c5a8c ("powerpc/mm: Distribute platform specific PAGE and PMD flags and definitions") and added definitions for each MMU family. However we missed adding a definition for 64s, which was not really a bug because it's currently not used. But we'd like to use PAGE_KERNEL_ROX in a future patch so add a definition now. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331003845.216246-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 058601efbc8a3b..0c89977ec10bf0 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -116,6 +116,7 @@ */ #define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY) #define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ) +#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC) #define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_RW | _PAGE_EXEC) /* From b56d55a5aa4aa9fc166595a7feb57f153ef7b555 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Mar 2021 11:38:41 +1100 Subject: [PATCH 146/302] powerpc/pseries: Add key to flags in pSeries_lpar_hpte_updateboltedpp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flags argument to plpar_pte_protect() (aka. H_PROTECT), includes the key in bits 9-13, but currently we always set those bits to zero. In the past that hasn't been a problem because we always used key 0 for the kernel, and updateboltedpp() is only used for kernel mappings. However since commit d94b827e89dc ("powerpc/book3s64/kuap: Use Key 3 for kernel mapping with hash translation") we are now inadvertently changing the key (to zero) when we call plpar_pte_protect(). That hasn't broken anything because updateboltedpp() is only used for STRICT_KERNEL_RWX, which is currently disabled on 64s due to other bugs. But we want to fix that, so first we need to pass the key correctly to plpar_pte_protect(). We can't pass our newpp value directly in, we have to convert it into the form expected by the hcall. The hcall we're using here is H_PROTECT, which is specified in section 14.5.4.1.6 of LoPAPR v1.1. It takes a `flags` parameter, and the description for flags says: * flags: AVPN, pp0, pp1, pp2, key0-key4, n, and for the CMO option: CMO Option flags as defined in Table 189‚ If you then go to the start of the parent section, 14.5.4.1, on page 405, it says: Register Linkage (For hcall() tokens 0x04 - 0x18) * On Call * R3 function call token * R4 flags (see Table 178‚ “Page Frame Table Access flags field definition‚” on page 401) Then you have to go to section 14.5.3, and on page 394 there is a list of hcalls and their tokens (table 176), and there you can see that H_PROTECT == 0x18. Finally you can look at table 178, on page 401, where it specifies the layout of the bits for the key: Bit Function ----------------- 50-54 | key0-key4 Those are big-endian bit numbers, converting to normal bit numbers you get bits 9-13, or 0x3e00. In the kernel we have: #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) So the LO bits of newpp are already in the right place, and the HI bits need to be shifted down by 48. Fixes: d94b827e89dc ("powerpc/book3s64/kuap: Use Key 3 for kernel mapping with hash translation") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331003845.216246-2-mpe@ellerman.id.au --- arch/powerpc/platforms/pseries/lpar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 764170fdb0f74a..8bbbddff7226df 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -976,11 +976,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); - flags = newpp & 7; + flags = newpp & (HPTE_R_PP | HPTE_R_N); if (mmu_has_feature(MMU_FTR_KERNEL_RO)) /* Move pp0 into bit 8 (IBM 55) */ flags |= (newpp & HPTE_R_PP0) >> 55; + flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO); + lpar_rc = plpar_pte_protect(flags, slot, 0); BUG_ON(lpar_rc != H_SUCCESS); From 2c02e656a29d5f64193eb93da92781bcf0517146 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Mar 2021 11:38:42 +1100 Subject: [PATCH 147/302] powerpc/64s: Use htab_convert_pte_flags() in hash__mark_rodata_ro() In hash__mark_rodata_ro() we pass the raw PP_RXXX value to hash__change_memory_range(). That has the effect of setting the key to zero, because PP_RXXX contains no key value. Fix it by using htab_convert_pte_flags(), which knows how to convert a pgprot into a pp value, including the key. Fixes: d94b827e89dc ("powerpc/book3s64/kuap: Use Key 3 for kernel mapping with hash translation") Signed-off-by: Michael Ellerman Reviewed-by: Daniel Axtens Link: https://lore.kernel.org/r/20210331003845.216246-3-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/hash_pgtable.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 567e0c6b3978e7..03819c259f0ab9 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -428,12 +428,14 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, void hash__mark_rodata_ro(void) { - unsigned long start, end; + unsigned long start, end, pp; start = (unsigned long)_stext; end = (unsigned long)__init_begin; - WARN_ON(!hash__change_memory_range(start, end, PP_RXXX)); + pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY); + + WARN_ON(!hash__change_memory_range(start, end, pp)); } void hash__mark_initmem_nx(void) From 6f223ebe9c3f3ed315a06cec156086f1f7f7ded1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Mar 2021 11:38:43 +1100 Subject: [PATCH 148/302] powerpc/mm/64s/hash: Factor out change_memory_range() Pull the loop calling hpte_updateboltedpp() out of hash__change_memory_range() into a helper function. We need it to be a separate function for the next patch. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331003845.216246-4-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/hash_pgtable.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 03819c259f0ab9..3663d3cdffacbc 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -400,10 +400,23 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX +static void change_memory_range(unsigned long start, unsigned long end, + unsigned int step, unsigned long newpp) +{ + unsigned long idx; + + pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", + start, end, newpp, step); + + for (idx = start; idx < end; idx += step) + /* Not sure if we can do much with the return value */ + mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, + mmu_kernel_ssize); +} + static bool hash__change_memory_range(unsigned long start, unsigned long end, unsigned long newpp) { - unsigned long idx; unsigned int step, shift; shift = mmu_psize_defs[mmu_linear_psize].shift; @@ -415,13 +428,7 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, if (start >= end) return false; - pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", - start, end, newpp, step); - - for (idx = start; idx < end; idx += step) - /* Not sure if we can do much with the return value */ - mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, - mmu_kernel_ssize); + change_memory_range(start, end, step, newpp); return true; } From 87e65ad7bd3a84a992723753fcc23d31c2d063c2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Mar 2021 11:38:44 +1100 Subject: [PATCH 149/302] powerpc/mm/64s/hash: Add real-mode change_memory_range() for hash LPAR When we enabled STRICT_KERNEL_RWX we received some reports of boot failures when using the Hash MMU and running under phyp. The crashes are intermittent, and often exhibit as a completely unresponsive system, or possibly an oops. One example, which was caught in xmon: [ 14.068327][ T1] devtmpfs: mounted [ 14.069302][ T1] Freeing unused kernel memory: 5568K [ 14.142060][ T347] BUG: Unable to handle kernel instruction fetch [ 14.142063][ T1] Run /sbin/init as init process [ 14.142074][ T347] Faulting instruction address: 0xc000000000004400 cpu 0x2: Vector: 400 (Instruction Access) at [c00000000c7475e0] pc: c000000000004400: exc_virt_0x4400_instruction_access+0x0/0x80 lr: c0000000001862d4: update_rq_clock+0x44/0x110 sp: c00000000c747880 msr: 8000000040001031 current = 0xc00000000c60d380 paca = 0xc00000001ec9de80 irqmask: 0x03 irq_happened: 0x01 pid = 347, comm = kworker/2:1 ... enter ? for help [c00000000c747880] c0000000001862d4 update_rq_clock+0x44/0x110 (unreliable) [c00000000c7478f0] c000000000198794 update_blocked_averages+0xb4/0x6d0 [c00000000c7479f0] c000000000198e40 update_nohz_stats+0x90/0xd0 [c00000000c747a20] c0000000001a13b4 _nohz_idle_balance+0x164/0x390 [c00000000c747b10] c0000000001a1af8 newidle_balance+0x478/0x610 [c00000000c747be0] c0000000001a1d48 pick_next_task_fair+0x58/0x480 [c00000000c747c40] c000000000eaab5c __schedule+0x12c/0x950 [c00000000c747cd0] c000000000eab3e8 schedule+0x68/0x120 [c00000000c747d00] c00000000016b730 worker_thread+0x130/0x640 [c00000000c747da0] c000000000174d50 kthread+0x1a0/0x1b0 [c00000000c747e10] c00000000000e0f0 ret_from_kernel_thread+0x5c/0x6c This shows that CPU 2, which was idle, woke up and then appears to randomly take an instruction fault on a completely valid area of kernel text. The cause turns out to be the call to hash__mark_rodata_ro(), late in boot. Due to the way we layout text and rodata, that function actually changes the permissions for all of text and rodata to read-only plus execute. To do the permission change we use a hypervisor call, H_PROTECT. On phyp that appears to be implemented by briefly removing the mapping of the kernel text, before putting it back with the updated permissions. If any other CPU is executing during that window, it will see spurious faults on the kernel text and/or data, leading to crashes. To fix it we use stop machine to collect all other CPUs, and then have them drop into real mode (MMU off), while we change the mapping. That way they are unaffected by the mapping temporarily disappearing. We don't see this bug on KVM because KVM always use VPM=1, where faults are directed to the hypervisor, and the fault will be serialised vs the h_protect() by HPTE_V_HVLOCK. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331003845.216246-5-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/hash_pgtable.c | 105 +++++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 3663d3cdffacbc..ad5eff097d319b 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -400,6 +401,19 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX + +struct change_memory_parms { + unsigned long start, end, newpp; + unsigned int step, nr_cpus, master_cpu; + atomic_t cpu_counter; +}; + +// We'd rather this was on the stack but it has to be in the RMO +static struct change_memory_parms chmem_parms; + +// And therefore we need a lock to protect it from concurrent use +static DEFINE_MUTEX(chmem_lock); + static void change_memory_range(unsigned long start, unsigned long end, unsigned int step, unsigned long newpp) { @@ -414,6 +428,73 @@ static void change_memory_range(unsigned long start, unsigned long end, mmu_kernel_ssize); } +static int notrace chmem_secondary_loop(struct change_memory_parms *parms) +{ + unsigned long msr, tmp, flags; + int *p; + + p = &parms->cpu_counter.counter; + + local_irq_save(flags); + hard_irq_disable(); + + asm volatile ( + // Switch to real mode and leave interrupts off + "mfmsr %[msr] ;" + "li %[tmp], %[MSR_IR_DR] ;" + "andc %[tmp], %[msr], %[tmp] ;" + "mtmsrd %[tmp] ;" + + // Tell the master we are in real mode + "1: " + "lwarx %[tmp], 0, %[p] ;" + "addic %[tmp], %[tmp], -1 ;" + "stwcx. %[tmp], 0, %[p] ;" + "bne- 1b ;" + + // Spin until the counter goes to zero + "2: ;" + "lwz %[tmp], 0(%[p]) ;" + "cmpwi %[tmp], 0 ;" + "bne- 2b ;" + + // Switch back to virtual mode + "mtmsrd %[msr] ;" + + : // outputs + [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p) + : // inputs + [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR) + : // clobbers + "cc", "xer" + ); + + local_irq_restore(flags); + + return 0; +} + +static int change_memory_range_fn(void *data) +{ + struct change_memory_parms *parms = data; + + if (parms->master_cpu != smp_processor_id()) + return chmem_secondary_loop(parms); + + // Wait for all but one CPU (this one) to call-in + while (atomic_read(&parms->cpu_counter) > 1) + barrier(); + + change_memory_range(parms->start, parms->end, parms->step, parms->newpp); + + mb(); + + // Signal the other CPUs that we're done + atomic_dec(&parms->cpu_counter); + + return 0; +} + static bool hash__change_memory_range(unsigned long start, unsigned long end, unsigned long newpp) { @@ -428,7 +509,29 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, if (start >= end) return false; - change_memory_range(start, end, step, newpp); + if (firmware_has_feature(FW_FEATURE_LPAR)) { + mutex_lock(&chmem_lock); + + chmem_parms.start = start; + chmem_parms.end = end; + chmem_parms.step = step; + chmem_parms.newpp = newpp; + chmem_parms.master_cpu = smp_processor_id(); + + cpus_read_lock(); + + atomic_set(&chmem_parms.cpu_counter, num_online_cpus()); + + // Ensure state is consistent before we call the other CPUs + mb(); + + stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms, + cpu_online_mask); + + cpus_read_unlock(); + mutex_unlock(&chmem_lock); + } else + change_memory_range(start, end, step, newpp); return true; } From bd573a81312fd9d6520b1cc81a88fd29e670e1ff Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Mar 2021 11:38:45 +1100 Subject: [PATCH 150/302] powerpc/mm/64s: Allow STRICT_KERNEL_RWX again We have now fixed the known bugs in STRICT_KERNEL_RWX for Book3S 64-bit Hash and Radix MMUs, see preceding commits, so allow the option to be selected again. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331003845.216246-6-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 048e2b2a5c638e..36d7c56df91d81 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -136,7 +136,7 @@ config PPC select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 - select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) + select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_COPY_MC if PPC64 From c6b4c9147f8b85d159f670d7bce71a93d16062a2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 26 Mar 2021 21:12:01 +1100 Subject: [PATCH 151/302] powerpc/64: Move security code into security.c When the original spectre/meltdown mitigations were merged we put them in setup_64.c for lack of a better place. Since then we created security.c for some of the other mitigation related code. But it should all be in there. This sort of code movement can cause trouble for backports, but hopefully this code is relatively stable these days (famous last words). Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210326101201.1973552-1-mpe@ellerman.id.au --- arch/powerpc/kernel/security.c | 261 ++++++++++++++++++++++++++++++++ arch/powerpc/kernel/setup_64.c | 264 --------------------------------- 2 files changed, 261 insertions(+), 264 deletions(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index e4e1a94ccf6a6f..287286ddf7dceb 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,7 @@ #include #include +#include "setup.h" u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; @@ -541,6 +543,178 @@ void setup_count_cache_flush(void) toggle_branch_cache_flush(enable); } +static enum l1d_flush_type enabled_flush_types; +static void *l1d_flush_fallback_area; +static bool no_rfi_flush; +static bool no_entry_flush; +static bool no_uaccess_flush; +bool rfi_flush; +static bool entry_flush; +static bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key); + +static int __init handle_no_rfi_flush(char *p) +{ + pr_info("rfi-flush: disabled on command line."); + no_rfi_flush = true; + return 0; +} +early_param("no_rfi_flush", handle_no_rfi_flush); + +static int __init handle_no_entry_flush(char *p) +{ + pr_info("entry-flush: disabled on command line."); + no_entry_flush = true; + return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + +static int __init handle_no_uaccess_flush(char *p) +{ + pr_info("uaccess-flush: disabled on command line."); + no_uaccess_flush = true; + return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); + +/* + * The RFI flush is not KPTI, but because users will see doco that says to use + * nopti we hijack that option here to also disable the RFI flush. + */ +static int __init handle_no_pti(char *p) +{ + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); + handle_no_rfi_flush(NULL); + return 0; +} +early_param("nopti", handle_no_pti); + +static void do_nothing(void *unused) +{ + /* + * We don't need to do the flush explicitly, just enter+exit kernel is + * sufficient, the RFI exit handlers will do the right thing. + */ +} + +void rfi_flush_enable(bool enable) +{ + if (enable) { + do_rfi_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else + do_rfi_flush_fixups(L1D_FLUSH_NONE); + + rfi_flush = enable; +} + +static void entry_flush_enable(bool enable) +{ + if (enable) { + do_entry_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else { + do_entry_flush_fixups(L1D_FLUSH_NONE); + } + + entry_flush = enable; +} + +static void uaccess_flush_enable(bool enable) +{ + if (enable) { + do_uaccess_flush_fixups(enabled_flush_types); + static_branch_enable(&uaccess_flush_key); + on_each_cpu(do_nothing, NULL, 1); + } else { + static_branch_disable(&uaccess_flush_key); + do_uaccess_flush_fixups(L1D_FLUSH_NONE); + } + + uaccess_flush = enable; +} + +static void __ref init_fallback_flush(void) +{ + u64 l1d_size, limit; + int cpu; + + /* Only allocate the fallback flush area once (at boot time). */ + if (l1d_flush_fallback_area) + return; + + l1d_size = ppc64_caches.l1d.size; + + /* + * If there is no d-cache-size property in the device tree, l1d_size + * could be zero. That leads to the loop in the asm wrapping around to + * 2^64-1, and then walking off the end of the fallback area and + * eventually causing a page fault which is fatal. Just default to + * something vaguely sane. + */ + if (!l1d_size) + l1d_size = (64 * 1024); + + limit = min(ppc64_bolted_size(), ppc64_rma_size); + + /* + * Align to L1d size, and size it at 2x L1d size, to catch possible + * hardware prefetch runoff. We don't have a recipe for load patterns to + * reliably avoid the prefetcher. + */ + l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2, + l1d_size, MEMBLOCK_LOW_LIMIT, + limit, NUMA_NO_NODE); + if (!l1d_flush_fallback_area) + panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n", + __func__, l1d_size * 2, l1d_size, &limit); + + + for_each_possible_cpu(cpu) { + struct paca_struct *paca = paca_ptrs[cpu]; + paca->rfi_flush_fallback_area = l1d_flush_fallback_area; + paca->l1d_flush_size = l1d_size; + } +} + +void setup_rfi_flush(enum l1d_flush_type types, bool enable) +{ + if (types & L1D_FLUSH_FALLBACK) { + pr_info("rfi-flush: fallback displacement flush available\n"); + init_fallback_flush(); + } + + if (types & L1D_FLUSH_ORI) + pr_info("rfi-flush: ori type flush available\n"); + + if (types & L1D_FLUSH_MTTRIG) + pr_info("rfi-flush: mttrig type flush available\n"); + + enabled_flush_types = types; + + if (!cpu_mitigations_off() && !no_rfi_flush) + rfi_flush_enable(enable); +} + +void setup_entry_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_entry_flush) + entry_flush_enable(enable); +} + +void setup_uaccess_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_uaccess_flush) + uaccess_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int count_cache_flush_set(void *data, u64 val) { @@ -579,5 +753,92 @@ static __init int count_cache_flush_debugfs_init(void) return 0; } device_initcall(count_cache_flush_debugfs_init); + +static int rfi_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != rfi_flush) + rfi_flush_enable(enable); + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static int entry_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != entry_flush) + entry_flush_enable(enable); + + return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ + *val = entry_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + +static int uaccess_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != uaccess_flush) + uaccess_flush_enable(enable); + + return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ + *val = uaccess_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); #endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 04a31586f76071..ccbfcc88758ca6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -942,266 +941,3 @@ static int __init disable_hardlockup_detector(void) return 0; } early_initcall(disable_hardlockup_detector); - -#ifdef CONFIG_PPC_BOOK3S_64 -static enum l1d_flush_type enabled_flush_types; -static void *l1d_flush_fallback_area; -static bool no_rfi_flush; -static bool no_entry_flush; -static bool no_uaccess_flush; -bool rfi_flush; -static bool entry_flush; -static bool uaccess_flush; -DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); -EXPORT_SYMBOL(uaccess_flush_key); - -static int __init handle_no_rfi_flush(char *p) -{ - pr_info("rfi-flush: disabled on command line."); - no_rfi_flush = true; - return 0; -} -early_param("no_rfi_flush", handle_no_rfi_flush); - -static int __init handle_no_entry_flush(char *p) -{ - pr_info("entry-flush: disabled on command line."); - no_entry_flush = true; - return 0; -} -early_param("no_entry_flush", handle_no_entry_flush); - -static int __init handle_no_uaccess_flush(char *p) -{ - pr_info("uaccess-flush: disabled on command line."); - no_uaccess_flush = true; - return 0; -} -early_param("no_uaccess_flush", handle_no_uaccess_flush); - -/* - * The RFI flush is not KPTI, but because users will see doco that says to use - * nopti we hijack that option here to also disable the RFI flush. - */ -static int __init handle_no_pti(char *p) -{ - pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); - handle_no_rfi_flush(NULL); - return 0; -} -early_param("nopti", handle_no_pti); - -static void do_nothing(void *unused) -{ - /* - * We don't need to do the flush explicitly, just enter+exit kernel is - * sufficient, the RFI exit handlers will do the right thing. - */ -} - -void rfi_flush_enable(bool enable) -{ - if (enable) { - do_rfi_flush_fixups(enabled_flush_types); - on_each_cpu(do_nothing, NULL, 1); - } else - do_rfi_flush_fixups(L1D_FLUSH_NONE); - - rfi_flush = enable; -} - -static void entry_flush_enable(bool enable) -{ - if (enable) { - do_entry_flush_fixups(enabled_flush_types); - on_each_cpu(do_nothing, NULL, 1); - } else { - do_entry_flush_fixups(L1D_FLUSH_NONE); - } - - entry_flush = enable; -} - -static void uaccess_flush_enable(bool enable) -{ - if (enable) { - do_uaccess_flush_fixups(enabled_flush_types); - static_branch_enable(&uaccess_flush_key); - on_each_cpu(do_nothing, NULL, 1); - } else { - static_branch_disable(&uaccess_flush_key); - do_uaccess_flush_fixups(L1D_FLUSH_NONE); - } - - uaccess_flush = enable; -} - -static void __ref init_fallback_flush(void) -{ - u64 l1d_size, limit; - int cpu; - - /* Only allocate the fallback flush area once (at boot time). */ - if (l1d_flush_fallback_area) - return; - - l1d_size = ppc64_caches.l1d.size; - - /* - * If there is no d-cache-size property in the device tree, l1d_size - * could be zero. That leads to the loop in the asm wrapping around to - * 2^64-1, and then walking off the end of the fallback area and - * eventually causing a page fault which is fatal. Just default to - * something vaguely sane. - */ - if (!l1d_size) - l1d_size = (64 * 1024); - - limit = min(ppc64_bolted_size(), ppc64_rma_size); - - /* - * Align to L1d size, and size it at 2x L1d size, to catch possible - * hardware prefetch runoff. We don't have a recipe for load patterns to - * reliably avoid the prefetcher. - */ - l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2, - l1d_size, MEMBLOCK_LOW_LIMIT, - limit, NUMA_NO_NODE); - if (!l1d_flush_fallback_area) - panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n", - __func__, l1d_size * 2, l1d_size, &limit); - - - for_each_possible_cpu(cpu) { - struct paca_struct *paca = paca_ptrs[cpu]; - paca->rfi_flush_fallback_area = l1d_flush_fallback_area; - paca->l1d_flush_size = l1d_size; - } -} - -void setup_rfi_flush(enum l1d_flush_type types, bool enable) -{ - if (types & L1D_FLUSH_FALLBACK) { - pr_info("rfi-flush: fallback displacement flush available\n"); - init_fallback_flush(); - } - - if (types & L1D_FLUSH_ORI) - pr_info("rfi-flush: ori type flush available\n"); - - if (types & L1D_FLUSH_MTTRIG) - pr_info("rfi-flush: mttrig type flush available\n"); - - enabled_flush_types = types; - - if (!cpu_mitigations_off() && !no_rfi_flush) - rfi_flush_enable(enable); -} - -void setup_entry_flush(bool enable) -{ - if (cpu_mitigations_off()) - return; - - if (!no_entry_flush) - entry_flush_enable(enable); -} - -void setup_uaccess_flush(bool enable) -{ - if (cpu_mitigations_off()) - return; - - if (!no_uaccess_flush) - uaccess_flush_enable(enable); -} - -#ifdef CONFIG_DEBUG_FS -static int rfi_flush_set(void *data, u64 val) -{ - bool enable; - - if (val == 1) - enable = true; - else if (val == 0) - enable = false; - else - return -EINVAL; - - /* Only do anything if we're changing state */ - if (enable != rfi_flush) - rfi_flush_enable(enable); - - return 0; -} - -static int rfi_flush_get(void *data, u64 *val) -{ - *val = rfi_flush ? 1 : 0; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); - -static int entry_flush_set(void *data, u64 val) -{ - bool enable; - - if (val == 1) - enable = true; - else if (val == 0) - enable = false; - else - return -EINVAL; - - /* Only do anything if we're changing state */ - if (enable != entry_flush) - entry_flush_enable(enable); - - return 0; -} - -static int entry_flush_get(void *data, u64 *val) -{ - *val = entry_flush ? 1 : 0; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); - -static int uaccess_flush_set(void *data, u64 val) -{ - bool enable; - - if (val == 1) - enable = true; - else if (val == 0) - enable = false; - else - return -EINVAL; - - /* Only do anything if we're changing state */ - if (enable != uaccess_flush) - uaccess_flush_enable(enable); - - return 0; -} - -static int uaccess_flush_get(void *data, u64 *val) -{ - *val = uaccess_flush ? 1 : 0; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); - -static __init int rfi_flush_debugfs_init(void) -{ - debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); - debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); - debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); - return 0; -} -device_initcall(rfi_flush_debugfs_init); -#endif -#endif /* CONFIG_PPC_BOOK3S_64 */ From acd4dfeb49c8ec1071b1e67683c5779e97fdc5b9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 15 Mar 2021 14:41:59 +1100 Subject: [PATCH 152/302] powerpc/kexec: Don't use .machine ppc64 in trampoline_64.S As best as I can tell the ".machine" directive in trampoline_64.S is no longer, or never was, necessary. It was added in commit 0d97631392c2 ("powerpc: Add purgatory for kexec_file_load() implementation."), which created the file based on the kexec-tools purgatory. It may be/have-been necessary in the kexec-tools version, but we have a completely different build system, and we already pass the desired CPU flags, eg: gcc ... -m64 -Wl,-a64 -mabi=elfv2 -Wa,-maltivec -Wa,-mpower4 -Wa,-many ... arch/powerpc/purgatory/trampoline_64.S So drop the ".machine" directive and rely on the assembler flags. Reported-by: Daniel Axtens Signed-off-by: Michael Ellerman Reviewed-by: Segher Boessenkool Link: https://lore.kernel.org/r/20210315034159.315675-1-mpe@ellerman.id.au --- arch/powerpc/purgatory/trampoline_64.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/purgatory/trampoline_64.S b/arch/powerpc/purgatory/trampoline_64.S index d956b8a35fd137..b35837c138526c 100644 --- a/arch/powerpc/purgatory/trampoline_64.S +++ b/arch/powerpc/purgatory/trampoline_64.S @@ -12,7 +12,6 @@ #include #include - .machine ppc64 .balign 256 .globl purgatory_start purgatory_start: From 08a022ad3dfafc7e33d4529015e14bb75179cacc Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Thu, 25 Feb 2021 14:21:06 +1100 Subject: [PATCH 153/302] powerpc/powernv/memtrace: Allow mmaping trace buffers Let the memory removed from the linear mapping to be used for the trace buffers be mmaped. This is a useful way of providing cache-inhibited memory for the alignment_handler selftest. Signed-off-by: Jordan Niethe [mpe: make memtrace_mmap() static as noticed by lkp@intel.com] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210225032108.1458352-1-jniethe5@gmail.com --- arch/powerpc/platforms/powernv/memtrace.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 019669eb21d271..71c1262589feaf 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -46,10 +46,26 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); } +static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct memtrace_entry *ent = filp->private_data; + + if (ent->size < vma->vm_end - vma->vm_start) + return -EINVAL; + + if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + return remap_pfn_range(vma, vma->vm_start, PHYS_PFN(ent->start) + vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot); +} + static const struct file_operations memtrace_fops = { .llseek = default_llseek, .read = memtrace_read, .open = simple_open, + .mmap = memtrace_mmap, }; #define FLUSH_CHUNK_SIZE SZ_1G @@ -187,7 +203,7 @@ static int memtrace_init_debugfs(void) dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); ent->dir = dir; - debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); + debugfs_create_file_unsafe("trace", 0600, dir, ent, &memtrace_fops); debugfs_create_x64("start", 0400, dir, &ent->start); debugfs_create_x64("size", 0400, dir, &ent->size); } From 812aa68ef7d4d71bed996468ead665092a3f8de9 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Thu, 25 Feb 2021 14:21:07 +1100 Subject: [PATCH 154/302] selftests/powerpc: Suggest memtrace instead of /dev/mem for ci memory The suggested alternative for getting cache-inhibited memory with 'mem=' and /dev/mem is pretty hacky. Also, PAPR guests do not allow system memory to be mapped cache-inhibited so despite /dev/mem being available this will not work which can cause confusion. Instead recommend using the memtrace buffers. memtrace is only available on powernv so there will not be any chance of trying to do this in a guest. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210225032108.1458352-2-jniethe5@gmail.com --- .../selftests/powerpc/alignment/alignment_handler.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index c25cf7cd45e9fd..33ee34fc0828a1 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -10,16 +10,7 @@ * * We create two sets of source and destination buffers, one in regular memory, * the other cache-inhibited (by default we use /dev/fb0 for this, but an - * alterative path for cache-inhibited memory may be provided). - * - * One way to get cache-inhibited memory is to use the "mem" kernel parameter - * to limit the kernel to less memory than actually exists. Addresses above - * the limit may still be accessed but will be treated as cache-inhibited. For - * example, if there is actually 4GB of memory and the parameter "mem=3GB" is - * used, memory from address 0xC0000000 onwards is treated as cache-inhibited. - * To access this region /dev/mem is used. The kernel should be configured - * without CONFIG_STRICT_DEVMEM. In this case use: - * ./alignment_handler /dev/mem 0xc0000000 + * alterative path for cache-inhibited memory may be provided, e.g. memtrace). * * We initialise the source buffers, then use whichever set of load/store * instructions is under test to copy bytes from the source buffers to the From 10f8f96179ecc7f69c927f6d231f6d02736cea83 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Tue, 6 Apr 2021 12:16:01 -0400 Subject: [PATCH 155/302] powerpc/perf: Fix PMU constraint check for EBB events The power PMU group constraints includes check for EBB events to make sure all events in a group must agree on EBB. This will prevent scheduling EBB and non-EBB events together. But in the existing check, settings for constraint mask and value is interchanged. Patch fixes the same. Before the patch, PMU selftest "cpu_event_pinned_vs_ebb_test" fails with below in dmesg logs. This happens because EBB event gets enabled along with a non-EBB cpu event. [35600.453346] cpu_event_pinne[41326]: illegal instruction (4) at 10004a18 nip 10004a18 lr 100049f8 code 1 in cpu_event_pinned_vs_ebb_test[10000000+10000] Test results after the patch: $ ./pmu/ebb/cpu_event_pinned_vs_ebb_test test: cpu_event_pinned_vs_ebb tags: git_version:v5.12-rc5-93-gf28c3125acd3-dirty Binding to cpu 8 EBB Handler is at 0x100050c8 read error on event 0x7fffe6bd4040! PM_RUN_INST_CMPL: result 9872 running/enabled 37930432 success: cpu_event_pinned_vs_ebb This bug was hidden by other logic until commit 1908dc911792 (perf: Tweak perf_event_attr::exclusive semantics). Fixes: 4df489991182 ("powerpc/perf: Add power8 EBB support") Reported-by: Thadeu Lima de Souza Cascardo Signed-off-by: Athira Rajeev [mpe: Mention commit 1908dc911792] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1617725761-1464-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/isa207-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index e4f577da33d8b1..8b5eeb6fb2fb3b 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -447,8 +447,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, * EBB events are pinned & exclusive, so this should never actually * hit, but we leave it as a fallback in case. */ - mask |= CNST_EBB_VAL(ebb); - value |= CNST_EBB_MASK; + mask |= CNST_EBB_MASK; + value |= CNST_EBB_VAL(ebb); *maskp = mask; *valp = value; From 98db179a78dd8379e9d2cbfc3f00224168a9344c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 6 Apr 2021 12:55:08 +1000 Subject: [PATCH 156/302] powerpc/64s: power4 nap fixup in C There is no need for this to be in asm, use the new intrrupt entry wrapper. Signed-off-by: Nicholas Piggin Tested-by: Andreas Schwab Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210406025508.821718-1-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 24 ++++++++++++++ arch/powerpc/include/asm/processor.h | 1 + arch/powerpc/include/asm/thread_info.h | 6 ++++ arch/powerpc/kernel/exceptions-64s.S | 45 -------------------------- arch/powerpc/kernel/idle_book3s.S | 4 +++ 5 files changed, 35 insertions(+), 45 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 7c633896d758cd..05e7fc4ffb50d2 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -9,6 +9,17 @@ #include #include +static inline void nap_adjust_return(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_970_NAP + if (unlikely(test_thread_local_flags(_TLF_NAPPING))) { + /* Can avoid a test-and-clear because NMIs do not call this */ + clear_thread_local_flags(_TLF_NAPPING); + regs->nip = (unsigned long)power4_idle_nap_return; + } +#endif +} + struct interrupt_state { #ifdef CONFIG_PPC_BOOK3E_64 enum ctx_state ctx_state; @@ -124,6 +135,14 @@ static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct in static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { + /* + * Adjust at exit so the main handler sees the true NIA. This must + * come before irq_exit() because irq_exit can enable interrupts, and + * if another interrupt is taken before nap_adjust_return has run + * here, then that interrupt would return directly to idle nap return. + */ + nap_adjust_return(regs); + irq_exit(); interrupt_exit_prepare(regs, state); } @@ -179,6 +198,11 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter radix_enabled() || (mfmsr() & MSR_DR)) nmi_exit(); + /* + * nmi does not call nap_adjust_return because nmi should not create + * new work to do (must use irq_work for that). + */ + #ifdef CONFIG_PPC64 if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) this_cpu_set_ftrace_enabled(state->ftrace_enabled); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index ad36e852157789..7bf8a15af22469 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -389,6 +389,7 @@ extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); extern unsigned long isa206_idle_insn_mayloss(unsigned long type); #ifdef CONFIG_PPC_970_NAP extern void power4_idle_nap(void); +void power4_idle_nap_return(void); #endif extern unsigned long cpuidle_disable; diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 9d6402402b9bad..b4ec6c7dd72ee9 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -151,6 +151,12 @@ void arch_setup_new_exec(void); #ifndef __ASSEMBLY__ +static inline void clear_thread_local_flags(unsigned int flags) +{ + struct thread_info *ti = current_thread_info(); + ti->local_flags &= ~flags; +} + static inline bool test_thread_local_flags(unsigned int flags) { struct thread_info *ti = current_thread_info(); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 8082b690e87468..0cdb59e8b5773e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -692,25 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r1,GPR1(r1) .endm -/* - * When the idle code in power4_idle puts the CPU into NAP mode, - * it has to do so in a loop, and relies on the external interrupt - * and decrementer interrupt entry code to get it out of the loop. - * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags - * to signal that it is in the loop and needs help to get out. - */ -#ifdef CONFIG_PPC_970_NAP -#define FINISH_NAP \ -BEGIN_FTR_SECTION \ - ld r11, PACA_THREAD_INFO(r13); \ - ld r9,TI_LOCAL_FLAGS(r11); \ - andi. r10,r9,_TLF_NAPPING; \ - bnel power4_fixup_nap; \ -END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) -#else -#define FINISH_NAP -#endif - /* * There are a few constraints to be concerned with. * - Real mode exceptions code/data must be located at their physical location. @@ -1248,7 +1229,6 @@ EXC_COMMON_BEGIN(machine_check_common) */ GEN_COMMON machine_check - FINISH_NAP /* Enable MSR_RI when finished with PACA_EXMC */ li r10,MSR_RI mtmsrd r10,1 @@ -1571,7 +1551,6 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -1801,7 +1780,6 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl timer_interrupt b interrupt_return @@ -1886,7 +1864,6 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception @@ -2237,7 +2214,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception b interrupt_return @@ -2266,7 +2242,6 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception @@ -2299,7 +2274,6 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -2345,7 +2319,6 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception b interrupt_return @@ -3096,24 +3069,6 @@ USE_FIXED_SECTION(virt_trampolines) __end_interrupts: DEFINE_FIXED_SYMBOL(__end_interrupts) -#ifdef CONFIG_PPC_970_NAP - /* - * Called by exception entry code if _TLF_NAPPING was set, this clears - * the NAPPING flag, and redirects the exception exit to - * power4_fixup_nap_return. - */ - .globl power4_fixup_nap -EXC_COMMON_BEGIN(power4_fixup_nap) - andc r9,r9,r10 - std r9,TI_LOCAL_FLAGS(r11) - LOAD_REG_ADDR(r10, power4_idle_nap_return) - std r10,_NIP(r1) - blr - -power4_idle_nap_return: - blr -#endif - CLOSE_FIXED_SECTION(real_vectors); CLOSE_FIXED_SECTION(real_trampolines); CLOSE_FIXED_SECTION(virt_vectors); diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index f9e6d83e67207d..abb719b21cae72 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -209,4 +209,8 @@ _GLOBAL(power4_idle_nap) mtmsrd r7 isync b 1b + + .globl power4_idle_nap_return +power4_idle_nap_return: + blr #endif From 01ed0510941ae1350c501977132bdb54630614e2 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 6 Apr 2021 09:33:05 +0800 Subject: [PATCH 157/302] powerpc/pseries: remove unneeded semicolon Eliminate the following coccicheck warning: ./arch/powerpc/platforms/pseries/lpar.c:1633:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1617672785-81372-1-git-send-email-yang.lee@linux.alibaba.com --- arch/powerpc/platforms/pseries/lpar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 8bbbddff7226df..f257a892489c66 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1631,7 +1631,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift) } msleep(delay); rc = plpar_resize_hpt_prepare(0, shift); - }; + } switch (rc) { case H_SUCCESS: From b27dadecdf9102838331b9a0b41ffc1cfe288154 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 4 Apr 2021 12:26:23 -0700 Subject: [PATCH 158/302] powerpc: iommu: fix build when neither PCI or IBMVIO is set When neither CONFIG_PCI nor CONFIG_IBMVIO is set/enabled, iommu.c has a build error. The fault injection code is not useful in that kernel config, so make the FAIL_IOMMU option depend on PCI || IBMVIO. Prevents this build error (warning escalated to error): ../arch/powerpc/kernel/iommu.c:178:30: error: 'fail_iommu_bus_notifier' defined but not used [-Werror=unused-variable] 178 | static struct notifier_block fail_iommu_bus_notifier = { Fixes: d6b9a81b2a45 ("powerpc: IOMMU fault injection") Reported-by: kernel test robot Suggested-by: Michael Ellerman Signed-off-by: Randy Dunlap Acked-by: Randy Dunlap # build-tested Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210404192623.10697-1-rdunlap@infradead.org --- arch/powerpc/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index ae084357994e87..6342f9da454551 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -353,6 +353,7 @@ config PPC_EARLY_DEBUG_CPM_ADDR config FAIL_IOMMU bool "Fault-injection capability for IOMMU" depends on FAULT_INJECTION + depends on PCI || IBMVIO help Provide fault-injection capability for IOMMU. Each device can be selectively enabled via the fail_iommu property. From c46bbf5d2defae50d61ddf31502017ee8952af83 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 5 Apr 2021 09:57:27 +0000 Subject: [PATCH 159/302] powerpc/32: Remove powerpc specific definition of 'ptrdiff_t' For unknown reason, old commit d27dfd388715 ("Import pre2.0.8") changed 'ptrdiff_t' from 'int' to 'long'. GCC expects it as 'int' really, and this leads to the following warning when building KFENCE: CC mm/kfence/report.o In file included from ./include/linux/printk.h:7, from ./include/linux/kernel.h:16, from mm/kfence/report.c:10: mm/kfence/report.c: In function 'kfence_report_error': ./include/linux/kern_levels.h:5:18: warning: format '%td' expects argument of type 'ptrdiff_t', but argument 6 has type 'long int' [-Wformat=] 5 | #define KERN_SOH "\001" /* ASCII Start Of Header */ | ^~~~~~ ./include/linux/kern_levels.h:11:18: note: in expansion of macro 'KERN_SOH' 11 | #define KERN_ERR KERN_SOH "3" /* error conditions */ | ^~~~~~~~ ./include/linux/printk.h:343:9: note: in expansion of macro 'KERN_ERR' 343 | printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) | ^~~~~~~~ mm/kfence/report.c:213:3: note: in expansion of macro 'pr_err' 213 | pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%td):\n", | ^~~~~~ defines it as 'int', and defines 'size_t' and 'ssize_t' exactly as powerpc do, so remove the powerpc specific definitions and fallback on generic ones. Signed-off-by: Christophe Leroy Acked-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e43d133bf52fa19e577f64f3a3a38cedc570377d.1617616601.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/uapi/asm/posix_types.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/powerpc/include/uapi/asm/posix_types.h b/arch/powerpc/include/uapi/asm/posix_types.h index f698400e4bb017..9c03423125446c 100644 --- a/arch/powerpc/include/uapi/asm/posix_types.h +++ b/arch/powerpc/include/uapi/asm/posix_types.h @@ -12,11 +12,6 @@ typedef unsigned long __kernel_old_dev_t; #define __kernel_old_dev_t __kernel_old_dev_t #else -typedef unsigned int __kernel_size_t; -typedef int __kernel_ssize_t; -typedef long __kernel_ptrdiff_t; -#define __kernel_size_t __kernel_size_t - typedef short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t #endif From 5088eb4092df12d701af8e0e92860b7186365279 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:34 +1000 Subject: [PATCH 160/302] KVM: PPC: Book3S HV P9: Restore host CTRL SPR after guest exit The host CTRL (runlatch) value is not restored after guest exit. The host CTRL should always be 1 except in CPU idle code, so this can result in the host running with runlatch clear, and potentially switching to a different vCPU which then runs with runlatch clear as well. This has little effect on P9 machines, CTRL is only responsible for some PMU counter logic in the host and so other than corner cases of software relying on that, or explicitly reading the runlatch value (Linux does not appear to be affected but it's possible non-Linux guests could be), there should be no execution correctness problem, though it could be used as a covert channel between guests. There may be microcontrollers, firmware or monitoring tools that sample the runlatch value out-of-band, however since the register is writable by guests, these values would (should) not be relied upon for correct operation of the host, so suboptimal performance or incorrect reporting should be the worst problem. Fixes: 95a6432ce9038 ("KVM: PPC: Book3S HV: Streamlined guest entry/exit path on P9 for radix guests") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-2-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 13bad6bf4c9589..208a053c9adfde 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3728,7 +3728,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.dec_expires = dec + tb; vcpu->cpu = -1; vcpu->arch.thread_cpu = -1; + /* Save guest CTRL register, set runlatch to 1 */ vcpu->arch.ctrl = mfspr(SPRN_CTRLF); + if (!(vcpu->arch.ctrl & 1)) + mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1); vcpu->arch.iamr = mfspr(SPRN_IAMR); vcpu->arch.pspb = mfspr(SPRN_PSPB); From a19b70abc69aea8ea5974c57e1c3457d9df6aff2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:35 +1000 Subject: [PATCH 161/302] KVM: PPC: Book3S HV: Nested move LPCR sanitising to sanitise_hv_regs This will get a bit more complicated in future patches. Move it into the helper function. This change allows the L1 hypervisor to determine some of the LPCR bits that the L0 is using to run it, which could be a privilege violation (LPCR is HV-privileged), although the same problem exists now for HFSCR for example. Discussion of the HV privilege issue is ongoing and can be resolved with a later change. Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-3-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv_nested.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 0cd0e7aad588b2..3060e5deffc8d3 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -132,8 +132,27 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, } } +/* + * This can result in some L0 HV register state being leaked to an L1 + * hypervisor when the hv_guest_state is copied back to the guest after + * being modified here. + * + * There is no known problem with such a leak, and in many cases these + * register settings could be derived by the guest by observing behaviour + * and timing, interrupts, etc., but it is an issue to consider. + */ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) { + struct kvmppc_vcore *vc = vcpu->arch.vcore; + u64 mask; + + /* + * Don't let L1 change LPCR bits for the L2 except these: + */ + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | + LPCR_LPES | LPCR_MER; + hr->lpcr = (vc->lpcr & ~mask) | (hr->lpcr & mask); + /* * Don't let L1 enable features for L2 which we've disabled for L1, * but preserve the interrupt cause field. @@ -271,8 +290,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) u64 hv_ptr, regs_ptr; u64 hdec_exp; s64 delta_purr, delta_spurr, delta_ic, delta_vtb; - u64 mask; - unsigned long lpcr; if (vcpu->kvm->arch.l1_ptcr == 0) return H_NOT_AVAILABLE; @@ -321,9 +338,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; vcpu->arch.regs = l2_regs; vcpu->arch.shregs.msr = vcpu->arch.regs.msr; - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | - LPCR_LPES | LPCR_MER; - lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask); + sanitise_hv_regs(vcpu, &l2_hv); restore_hv_regs(vcpu, &l2_hv); @@ -335,7 +350,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) r = RESUME_HOST; break; } - r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); + r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr); } while (is_kvmppc_resume_guest(r)); /* save L2 state for return */ From 67145ef4960f55923b9e404c0b184944bfeded4d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:36 +1000 Subject: [PATCH 162/302] KVM: PPC: Book3S HV: Add a function to filter guest LPCR bits Guest LPCR depends on hardware type, and future changes will add restrictions based on errata and guest MMU mode. Move this logic to a common function and use it for the cases where the guest wants to update its LPCR (or the LPCR of a nested guest). This also adds a warning in other places that set or update LPCR if we try to set something that would have been disallowed by the filter, as a sanity check. Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-4-npiggin@gmail.com --- arch/powerpc/include/asm/kvm_book3s.h | 2 + arch/powerpc/kvm/book3s_hv.c | 68 ++++++++++++++++++++------- arch/powerpc/kvm/book3s_hv_nested.c | 8 +++- 3 files changed, 59 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 2f5f919f6cd302..c581215081570a 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -258,6 +258,8 @@ extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm, extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa, struct kvm_memory_slot *memslot, unsigned long *map); +extern unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, + unsigned long lpcr); extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask); extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 208a053c9adfde..268e31c7e49cf0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1635,6 +1635,35 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, return 0; } +/* + * Enforce limits on guest LPCR values based on hardware availability, + * guest configuration, and possibly hypervisor support and security + * concerns. + */ +unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr) +{ + /* On POWER8 and above, userspace can modify AIL */ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + lpcr &= ~LPCR_AIL; + + /* + * On POWER9, allow userspace to enable large decrementer for the + * guest, whether or not the host has it enabled. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + lpcr &= ~LPCR_LD; + + return lpcr; +} + +static void verify_lpcr(struct kvm *kvm, unsigned long lpcr) +{ + if (lpcr != kvmppc_filter_lpcr_hv(kvm, lpcr)) { + WARN_ONCE(1, "lpcr 0x%lx differs from filtered 0x%lx\n", + lpcr, kvmppc_filter_lpcr_hv(kvm, lpcr)); + } +} + static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, bool preserve_top32) { @@ -1643,6 +1672,23 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, u64 mask; spin_lock(&vc->lock); + + /* + * Userspace can only modify + * DPFD (default prefetch depth), ILE (interrupt little-endian), + * TC (translation control), AIL (alternate interrupt location), + * LD (large decrementer). + * These are subject to restrictions from kvmppc_filter_lcpr_hv(). + */ + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD; + + /* Broken 32-bit version of LPCR must not clear top bits */ + if (preserve_top32) + mask &= 0xFFFFFFFF; + + new_lpcr = kvmppc_filter_lpcr_hv(kvm, + (vc->lpcr & ~mask) | (new_lpcr & mask)); + /* * If ILE (interrupt little-endian) has changed, update the * MSR_LE bit in the intr_msr for each vcpu in this vcore. @@ -1661,25 +1707,8 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, } } - /* - * Userspace can only modify DPFD (default prefetch depth), - * ILE (interrupt little-endian) and TC (translation control). - * On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.). - */ - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; - if (cpu_has_feature(CPU_FTR_ARCH_207S)) - mask |= LPCR_AIL; - /* - * On POWER9, allow userspace to enable large decrementer for the - * guest, whether or not the host has it enabled. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - mask |= LPCR_LD; + vc->lpcr = new_lpcr; - /* Broken 32-bit version of LPCR must not clear top bits */ - if (preserve_top32) - mask &= 0xFFFFFFFF; - vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); } @@ -4644,8 +4673,10 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) struct kvmppc_vcore *vc = kvm->arch.vcores[i]; if (!vc) continue; + spin_lock(&vc->lock); vc->lpcr = (vc->lpcr & ~mask) | lpcr; + verify_lpcr(kvm, vc->lpcr); spin_unlock(&vc->lock); if (++cores_done >= kvm->arch.online_vcores) break; @@ -4973,6 +5004,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvmppc_setup_partition_table(kvm); } + verify_lpcr(kvm, lpcr); kvm->arch.lpcr = lpcr; /* Initialization for future HPT resizes */ diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 3060e5deffc8d3..d14fe32f167b82 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -151,7 +151,13 @@ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) */ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | LPCR_LPES | LPCR_MER; - hr->lpcr = (vc->lpcr & ~mask) | (hr->lpcr & mask); + + /* + * Additional filtering is required depending on hardware + * and configuration. + */ + hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm, + (vc->lpcr & ~mask) | (hr->lpcr & mask)); /* * Don't let L1 enable features for L2 which we've disabled for L1, From bcc92a0d6d6eae1e7b34a88f58ae69c081d85f97 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:37 +1000 Subject: [PATCH 163/302] KVM: PPC: Book3S HV: Disallow LPCR[AIL] to be set to 1 or 2 These are already disallowed by H_SET_MODE from the guest, also disallow these by updating LPCR directly. AIL modes can affect the host interrupt behaviour while the guest LPCR value is set, so filter it here too. Suggested-by: Fabiano Rosas Signed-off-by: Nicholas Piggin Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-5-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 268e31c7e49cf0..3de8a1f89a7db6 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -803,7 +803,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, vcpu->arch.dawrx1 = value2; return H_SUCCESS; case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: - /* KVM does not support mflags=2 (AIL=2) */ + /* + * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved. + * Keep this in synch with kvmppc_filter_guest_lpcr_hv. + */ if (mflags != 0 && mflags != 3) return H_UNSUPPORTED_FLAG_START; return H_TOO_HARD; @@ -1645,6 +1648,8 @@ unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr) /* On POWER8 and above, userspace can modify AIL */ if (!cpu_has_feature(CPU_FTR_ARCH_207S)) lpcr &= ~LPCR_AIL; + if ((lpcr & LPCR_AIL) != LPCR_AIL_3) + lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */ /* * On POWER9, allow userspace to enable large decrementer for the From 72c15287210f7433f5fcb55452b05e4b6ccc6c15 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:38 +1000 Subject: [PATCH 164/302] KVM: PPC: Book3S HV: Prevent radix guests setting LPCR[TC] Prevent radix guests setting LPCR[TC]. This bit only applies to hash partitions. Signed-off-by: Nicholas Piggin Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-6-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3de8a1f89a7db6..70c6e9c27eb75c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1645,6 +1645,10 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, */ unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr) { + /* LPCR_TC only applies to HPT guests */ + if (kvm_is_radix(kvm)) + lpcr &= ~LPCR_TC; + /* On POWER8 and above, userspace can modify AIL */ if (!cpu_has_feature(CPU_FTR_ARCH_207S)) lpcr &= ~LPCR_AIL; From 4b5f0a0d49e663adf1c7c6f2dd05cb18dd53db8c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:39 +1000 Subject: [PATCH 165/302] KVM: PPC: Book3S HV: Remove redundant mtspr PSPB This SPR is set to 0 twice when exiting the guest. Suggested-by: Fabiano Rosas Signed-off-by: Nicholas Piggin Reviewed-by: Daniel Axtens Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-7-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 70c6e9c27eb75c..b88df175aa76bd 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3790,7 +3790,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_DSCR, host_dscr); mtspr(SPRN_TIDR, host_tidr); mtspr(SPRN_IAMR, host_iamr); - mtspr(SPRN_PSPB, 0); if (host_amr != vcpu->arch.amr) mtspr(SPRN_AMR, host_amr); From 6c12c4376bbbc89fc84480096ba838e07ab7c405 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:40 +1000 Subject: [PATCH 166/302] KVM: PPC: Book3S HV: remove unused kvmppc_h_protect argument The va argument is not used in the function or set by its asm caller, so remove it to be safe. Signed-off-by: Nicholas Piggin Reviewed-by: Daniel Axtens Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-8-npiggin@gmail.com --- arch/powerpc/include/asm/kvm_ppc.h | 3 +-- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 8aacd76bb702b4..9531b1c1b19000 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -767,8 +767,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index, unsigned long avpn); long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu); long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index, unsigned long avpn, - unsigned long va); + unsigned long pte_index, unsigned long avpn); long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index); long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 88da2764c1bb9c..7af7c70f14680e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -673,8 +673,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) } long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index, unsigned long avpn, - unsigned long va) + unsigned long pte_index, unsigned long avpn) { struct kvm *kvm = vcpu->kvm; __be64 *hpte; From 0fd85cb83fbd7048d8a024ba1338924349e26fd5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:41 +1000 Subject: [PATCH 167/302] KVM: PPC: Book3S HV: Fix CONFIG_SPAPR_TCE_IOMMU=n default hcalls This config option causes the warning in init_default_hcalls to fire because the TCE handlers are in the default hcall list but not implemented. Signed-off-by: Nicholas Piggin Reviewed-by: Daniel Axtens Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-9-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b88df175aa76bd..4a532410e12829 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5412,8 +5412,10 @@ static unsigned int default_hcall_list[] = { H_READ, H_PROTECT, H_BULK_REMOVE, +#ifdef CONFIG_SPAPR_TCE_IOMMU H_GET_TCE, H_PUT_TCE, +#endif H_SET_DABR, H_SET_XDABR, H_CEDE, From 5eee8371828a92a2620453907d6b2b6dc819ab3a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:42 +1000 Subject: [PATCH 168/302] powerpc/64s: Remove KVM handler support from CBE_RAS interrupts Cell does not support KVM. Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-10-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 8082b690e87468..a0515cb829c2ce 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2530,8 +2530,6 @@ EXC_VIRT_NONE(0x5100, 0x100) INT_DEFINE_BEGIN(cbe_system_error) IVEC=0x1200 IHSRR=1 - IKVM_SKIP=1 - IKVM_REAL=1 INT_DEFINE_END(cbe_system_error) EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) @@ -2701,8 +2699,6 @@ EXC_COMMON_BEGIN(denorm_exception_common) INT_DEFINE_BEGIN(cbe_maintenance) IVEC=0x1600 IHSRR=1 - IKVM_SKIP=1 - IKVM_REAL=1 INT_DEFINE_END(cbe_maintenance) EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) @@ -2754,8 +2750,6 @@ EXC_COMMON_BEGIN(altivec_assist_common) INT_DEFINE_BEGIN(cbe_thermal) IVEC=0x1800 IHSRR=1 - IKVM_SKIP=1 - IKVM_REAL=1 INT_DEFINE_END(cbe_thermal) EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) From da487a5d1bee6a30798a8db15986d3d028c8ac92 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:43 +1000 Subject: [PATCH 169/302] powerpc/64s: remove KVM SKIP test from instruction breakpoint handler The code being executed in KVM_GUEST_MODE_SKIP is hypervisor code with MSR[IR]=0, so the faults of concern are the d-side ones caused by access to guest context by the hypervisor. Instruction breakpoint interrupts are not a concern here. It's unlikely any good would come of causing breaks in this code, but skipping the instruction that caused it won't help matters (e.g., skip the mtmsr that sets MSR[DR]=0 or clears KVM_GUEST_MODE_SKIP). [Paul notes: "the 0x1300 interrupt was dropped from the architecture a long time ago and is not generated by P7, P8, P9 or P10." So add a comment about this in the handler code while we're here. ] Signed-off-by: Nicholas Piggin Reviewed-by: Daniel Axtens Reviewed-by: Fabiano Rosas Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-11-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a0515cb829c2ce..358cd4b0c08ebb 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2549,11 +2549,16 @@ EXC_REAL_NONE(0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) #endif - +/** + * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt. + * This has been removed from the ISA before 2.01, which is the earliest + * 64-bit BookS ISA supported, however the G5 / 970 implements this + * interrupt with a non-architected feature available through the support + * processor interface. + */ INT_DEFINE_BEGIN(instruction_breakpoint) IVEC=0x1300 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE - IKVM_SKIP=1 IKVM_REAL=1 #endif INT_DEFINE_END(instruction_breakpoint) From 946cf44ac6ce61378ea02386d39394a06d502f28 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:44 +1000 Subject: [PATCH 170/302] KVM: PPC: Book3S HV: Ensure MSR[ME] is always set in guest MSR Rather than add the ME bit to the MSR at guest entry, make it clear that the hypervisor does not allow the guest to clear the bit. The ME set is kept in guest entry for now, but a future patch will warn if it's not present. Signed-off-by: Nicholas Piggin Reviewed-by: Daniel Axtens Reviewed-by: Fabiano Rosas Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-12-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv_builtin.c | 3 +++ arch/powerpc/kvm/book3s_hv_nested.c | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 158d309b42a387..41cb03d0bde451 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -662,6 +662,9 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) { + /* Guest must always run with ME enabled. */ + msr = msr | MSR_ME; + /* * Check for illegal transactional state bit combination * and if we find it, force the TS field to a safe state. diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index d14fe32f167b82..fb03085c902ba8 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -343,7 +343,9 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) vcpu->arch.nested = l2; vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; vcpu->arch.regs = l2_regs; - vcpu->arch.shregs.msr = vcpu->arch.regs.msr; + + /* Guest must always run with ME enabled. */ + vcpu->arch.shregs.msr = vcpu->arch.regs.msr | MSR_ME; sanitise_hv_regs(vcpu, &l2_hv); restore_hv_regs(vcpu, &l2_hv); From 732f21a3053cf279eb6b85d19b7818a8f1dd2071 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Apr 2021 11:48:45 +1000 Subject: [PATCH 171/302] KVM: PPC: Book3S HV: Ensure MSR[HV] is always clear in guest MSR Rather than clear the HV bit from the MSR at guest entry, make it clear that the hypervisor does not allow the guest to set the bit. The HV clear is kept in guest entry for now, but a future patch will warn if it is set. Signed-off-by: Nicholas Piggin Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412014845.1517916-13-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv_builtin.c | 4 ++-- arch/powerpc/kvm/book3s_hv_nested.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 41cb03d0bde451..7a0e33a9c980df 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -662,8 +662,8 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) { - /* Guest must always run with ME enabled. */ - msr = msr | MSR_ME; + /* Guest must always run with ME enabled, HV disabled. */ + msr = (msr | MSR_ME) & ~MSR_HV; /* * Check for illegal transactional state bit combination diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index fb03085c902ba8..60724f6744219d 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -344,8 +344,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; vcpu->arch.regs = l2_regs; - /* Guest must always run with ME enabled. */ - vcpu->arch.shregs.msr = vcpu->arch.regs.msr | MSR_ME; + /* Guest must always run with ME enabled, HV disabled. */ + vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV; sanitise_hv_regs(vcpu, &l2_hv); restore_hv_regs(vcpu, &l2_hv); From af072b1a9d4d9edc24da84a071b0671e147026cb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 11 Apr 2021 16:39:53 +0000 Subject: [PATCH 172/302] powerpc/signal32: Fix build failure with CONFIG_SPE Add missing fault exit label in unsafe_copy_from_user() in order to avoid following build failure with CONFIG_SPE CC arch/powerpc/kernel/signal_32.o arch/powerpc/kernel/signal_32.c: In function 'restore_user_regs': arch/powerpc/kernel/signal_32.c:565:36: error: macro "unsafe_copy_from_user" requires 4 arguments, but only 3 given 565 | ELF_NEVRREG * sizeof(u32)); | ^ In file included from ./include/linux/uaccess.h:11, from ./include/linux/sched/task.h:11, from ./include/linux/sched/signal.h:9, from ./include/linux/rcuwait.h:6, from ./include/linux/percpu-rwsem.h:7, from ./include/linux/fs.h:33, from ./include/linux/huge_mm.h:8, from ./include/linux/mm.h:707, from arch/powerpc/kernel/signal_32.c:17: ./arch/powerpc/include/asm/uaccess.h:428: note: macro "unsafe_copy_from_user" defined here 428 | #define unsafe_copy_from_user(d, s, l, e) \ | arch/powerpc/kernel/signal_32.c:564:3: error: 'unsafe_copy_from_user' undeclared (first use in this function); did you mean 'raw_copy_from_user'? 564 | unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, | ^~~~~~~~~~~~~~~~~~~~~ | raw_copy_from_user arch/powerpc/kernel/signal_32.c:564:3: note: each undeclared identifier is reported only once for each function it appears in make[3]: *** [arch/powerpc/kernel/signal_32.o] Error 1 Fixes: 627b72bee84d ("powerpc/signal32: Convert restore_[tm]_user_regs() to user access block") Reported-by: kernel test robot Reported-by: Guenter Roeck Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/aad2cb1801a3cc99bc27081022925b9fc18a0dfb.1618159169.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 5be267b3a13e9f..fff4adc5a2b092 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -562,7 +562,7 @@ static long restore_user_regs(struct pt_regs *regs, if (msr & MSR_SPE) { /* restore spe registers from the stack */ unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32)); + ELF_NEVRREG * sizeof(u32), failed); current->thread.used_spe = true; } else if (current->thread.used_spe) memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); From 75b7c05ebf902632f7f540c3eb0a8945c2d74aab Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Mon, 29 Mar 2021 13:36:43 -0400 Subject: [PATCH 173/302] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall Add support for ND_REGION_ASYNC capability if the device tree indicates 'ibm,hcall-flush-required' property in the NVDIMM node. Flush is done by issuing H_SCM_FLUSH hcall to the hypervisor. If the flush request failed, the hypervisor is expected to to reflect the problem in the subsequent nvdimm H_SCM_HEALTH call. This patch prevents mmap of namespaces with MAP_SYNC flag if the nvdimm requires an explicit flush[1]. References: [1] https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c Signed-off-by: Shivaprasad G Bhat Reviewed-by: Aneesh Kumar K.V [mpe: Use unsigned long / long instead of uint64_t/int64_t] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/161703936121.36.7260632399582101498.stgit@e1fbed493c87 --- Documentation/powerpc/papr_hcalls.rst | 14 ++++++++ arch/powerpc/include/asm/hvcall.h | 3 +- arch/powerpc/platforms/pseries/papr_scm.c | 39 +++++++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst index 48fcf1255a338a..648f278eea8fc0 100644 --- a/Documentation/powerpc/papr_hcalls.rst +++ b/Documentation/powerpc/papr_hcalls.rst @@ -275,6 +275,20 @@ Health Bitmap Flags: Given a DRC Index collect the performance statistics for NVDIMM and copy them to the resultBuffer. +**H_SCM_FLUSH** + +| Input: *drcIndex, continue-token* +| Out: *continue-token* +| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY* + +Given a DRC Index Flush the data to backend NVDIMM device. + +The hcall returns H_BUSY when the flush takes longer time and the hcall needs +to be issued multiple times in order to be completely serviced. The +*continue-token* from the output to be passed in the argument list of +subsequent hcalls to the hypervisor until the hcall is completely serviced +at which point H_SUCCESS or other error is returned by the hypervisor. + References ========== .. [1] "Power Architecture Platform Reference" diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 455e188da26dbb..4430509060185f 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -315,7 +315,8 @@ #define H_SCM_HEALTH 0x400 #define H_SCM_PERFORMANCE_STATS 0x418 #define H_RPT_INVALIDATE 0x448 -#define MAX_HCALL_OPCODE H_RPT_INVALIDATE +#define H_SCM_FLUSH 0x44C +#define MAX_HCALL_OPCODE H_SCM_FLUSH /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 835163f54244ab..ae6f5d80d5ceb9 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -93,6 +93,7 @@ struct papr_scm_priv { uint64_t block_size; int metadata_size; bool is_volatile; + bool hcall_flush_required; uint64_t bound_addr; @@ -117,6 +118,38 @@ struct papr_scm_priv { size_t stat_buffer_len; }; +static int papr_scm_pmem_flush(struct nd_region *nd_region, + struct bio *bio __maybe_unused) +{ + struct papr_scm_priv *p = nd_region_provider_data(nd_region); + unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0; + long rc; + + dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index); + + do { + rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token); + token = ret_buf[0]; + + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + } while (rc == H_BUSY); + + if (rc) { + dev_err(&p->pdev->dev, "flush error: %lld", rc); + rc = -EIO; + } else { + dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index); + } + + return rc; +} + static LIST_HEAD(papr_nd_regions); static DEFINE_MUTEX(papr_ndr_lock); @@ -943,6 +976,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ndr_desc.num_mappings = 1; ndr_desc.nd_set = &p->nd_set; + if (p->hcall_flush_required) { + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); + ndr_desc.flush = papr_scm_pmem_flush; + } + if (p->is_volatile) p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); else { @@ -1088,6 +1126,7 @@ static int papr_scm_probe(struct platform_device *pdev) p->block_size = block_size; p->blocks = blocks; p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); + p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); /* We just need to ensure that set cookies are unique across */ uuid_parse(uuid_str, (uuid_t *) uuid); From a5d6a3e73acbd619dd5b7b831762b755f9e2db80 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Sun, 4 Apr 2021 22:01:48 +0530 Subject: [PATCH 174/302] powerpc/mm: Add cond_resched() while removing hpte mappings While removing large number of mappings from hash page tables for large memory systems as soft-lockup is reported because of the time spent inside htap_remove_mapping() like one below: watchdog: BUG: soft lockup - CPU#8 stuck for 23s! NIP plpar_hcall+0x38/0x58 LR pSeries_lpar_hpte_invalidate+0x68/0xb0 Call Trace: 0x1fffffffffff000 (unreliable) pSeries_lpar_hpte_removebolted+0x9c/0x230 hash__remove_section_mapping+0xec/0x1c0 remove_section_mapping+0x28/0x3c arch_remove_memory+0xfc/0x150 devm_memremap_pages_release+0x180/0x2f0 devm_action_release+0x30/0x50 release_nodes+0x28c/0x300 device_release_driver_internal+0x16c/0x280 unbind_store+0x124/0x170 drv_attr_store+0x44/0x60 sysfs_kf_write+0x64/0x90 kernfs_fop_write+0x1b0/0x290 __vfs_write+0x3c/0x70 vfs_write+0xd4/0x270 ksys_write+0xdc/0x130 system_call+0x5c/0x70 Fix this by adding a cond_resched() to the loop in htap_remove_mapping() that issues hcall to remove hpte mapping. The call to cond_resched() is issued every HZ jiffies which should prevent the soft-lockup from being reported. Suggested-by: Aneesh Kumar K.V Signed-off-by: Vaibhav Jain Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210404163148.321346-1-vaibhav@linux.ibm.com --- arch/powerpc/mm/book3s64/hash_utils.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7719995323c3f2..12de1906e97bc4 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -338,7 +338,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize) { - unsigned long vaddr; + unsigned long vaddr, time_limit; unsigned int step, shift; int rc; int ret = 0; @@ -351,8 +351,19 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, /* Unmap the full range specificied */ vaddr = ALIGN_DOWN(vstart, step); + time_limit = jiffies + HZ; + for (;vaddr < vend; vaddr += step) { rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize); + + /* + * For large number of mappings introduce a cond_resched() + * to prevent softlockup warnings. + */ + if (time_after(jiffies, time_limit)) { + cond_resched(); + time_limit = jiffies + HZ; + } if (rc == -ENOENT) { ret = -ENOENT; continue; From 2ec13df167040cd153c25c4d96d0ffc573ac4c40 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Apr 2021 13:30:41 +0000 Subject: [PATCH 175/302] powerpc/modules: Load modules closer to kernel text On book3s/32, when STRICT_KERNEL_RWX is selected, modules are allocated on the segment just before kernel text, ie on the 0xb0000000-0xbfffffff when PAGE_OFFSET is 0xc0000000. On the 8xx, TASK_SIZE is 0x80000000. The space between TASK_SIZE and PAGE_OFFSET is not used and could be used for modules. The idea comes from ARM architecture. Having modules just below PAGE_OFFSET offers an opportunity to minimise the distance between kernel text and modules and avoid trampolines in modules to access kernel functions or other module functions. When MODULES_VADDR is defined, powerpc has it's own module_alloc() function. In that function, first try to allocate the module above the limit defined by '_etext - 32M'. Then if the allocation fails, fallback to the entire MODULES area. DEBUG logs in module_32.c without the patch: [ 1572.588822] module_32: Applying ADD relocate section 13 to 12 [ 1572.588891] module_32: Doing plt for call to 0xc00671a4 at 0xcae04024 [ 1572.588964] module_32: Initialized plt for 0xc00671a4 at cae04000 [ 1572.589037] module_32: REL24 value = CAE04000. location = CAE04024 [ 1572.589110] module_32: Location before: 48000001. [ 1572.589171] module_32: Location after: 4BFFFFDD. [ 1572.589231] module_32: ie. jump to 03FFFFDC+CAE04024 = CEE04000 [ 1572.589317] module_32: Applying ADD relocate section 15 to 14 [ 1572.589386] module_32: Doing plt for call to 0xc00671a4 at 0xcadfc018 [ 1572.589457] module_32: Initialized plt for 0xc00671a4 at cadfc000 [ 1572.589529] module_32: REL24 value = CADFC000. location = CADFC018 [ 1572.589601] module_32: Location before: 48000000. [ 1572.589661] module_32: Location after: 4BFFFFE8. [ 1572.589723] module_32: ie. jump to 03FFFFE8+CADFC018 = CEDFC000 With the patch: [ 279.404671] module_32: Applying ADD relocate section 13 to 12 [ 279.404741] module_32: REL24 value = C00671B4. location = BF808024 [ 279.404814] module_32: Location before: 48000001. [ 279.404874] module_32: Location after: 4885F191. [ 279.404933] module_32: ie. jump to 0085F190+BF808024 = C00671B4 [ 279.405016] module_32: Applying ADD relocate section 15 to 14 [ 279.405085] module_32: REL24 value = C00671B4. location = BF800018 [ 279.405156] module_32: Location before: 48000000. [ 279.405215] module_32: Location after: 4886719C. [ 279.405275] module_32: ie. jump to 0086719C+BF800018 = C00671B4 We see that with the patch, no plt entries are set. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0c3d5cb8a4dfdf6ca1b8aeb385c01470d6628d55.1617283827.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/module.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index a211b0253cdb49..fab84024650c8d 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -14,6 +14,7 @@ #include #include #include +#include static LIST_HEAD(module_bug_list); @@ -88,12 +89,28 @@ int module_finalize(const Elf_Ehdr *hdr, } #ifdef MODULES_VADDR +static __always_inline void * +__module_alloc(unsigned long size, unsigned long start, unsigned long end) +{ + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, + PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); +} + void *module_alloc(unsigned long size) { + unsigned long limit = (unsigned long)_etext - SZ_32M; + void *ptr = NULL; + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, - __builtin_return_address(0)); + /* First try within 32M limit from _etext to avoid branch trampolines */ + if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) + ptr = __module_alloc(size, limit, MODULES_END); + + if (!ptr) + ptr = __module_alloc(size, MODULES_VADDR, MODULES_END); + + return ptr; } #endif From 9132a2e82adc6e5a1c7c7385df3bfb25576bdd80 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Apr 2021 13:30:42 +0000 Subject: [PATCH 176/302] powerpc/8xx: Define a MODULE area below kernel text On the 8xx, TASK_SIZE is 0x80000000. The space between TASK_SIZE and PAGE_OFFSET is not used. In order to benefit from the powerpc specific module_alloc() function which allocate modules with 32 Mbytes from end of kernel text, define MODULES_VADDR and MODULES_END. Set a 256Mb area just below PAGE_OFFSET, like book3s/32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a225606d5b3a8bc53fe612ad52c855c60b0a0a58.1617283827.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 478249959baae4..6e4faa0a9b35b7 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -172,6 +172,9 @@ #define mmu_linear_psize MMU_PAGE_8M +#define MODULES_VADDR (PAGE_OFFSET - SZ_256M) +#define MODULES_END PAGE_OFFSET + #ifndef __ASSEMBLY__ #include From 80edc68e0479bafdc4869ec3351e42316b824596 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Apr 2021 13:30:43 +0000 Subject: [PATCH 177/302] powerpc/32s: Define a MODULE area below kernel text all the time On book3s/32, the segment below kernel text is used for module allocation when CONFIG_STRICT_KERNEL_RWX is defined. In order to benefit from the powerpc specific module_alloc() function which allocate modules with 32 Mbytes from end of kernel text, use that segment below PAGE_OFFSET at all time. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a46dcdd39a9e80b012d86c294c4e5cd8d31665f3.1617283827.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/book3s/32/pgtable.h | 2 -- arch/powerpc/mm/book3s32/mmu.c | 7 ------- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 36d7c56df91d81..7c5c72cbf19f0c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1220,7 +1220,7 @@ config TASK_SIZE_BOOL config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx - default "0xb0000000" if PPC_BOOK3S_32 && STRICT_KERNEL_RWX + default "0xb0000000" if PPC_BOOK3S_32 default "0xc0000000" endmenu diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 415ae29fa73a2f..83c65845a1a9d6 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -194,10 +194,8 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #define VMALLOC_END ioremap_bot #endif -#ifdef CONFIG_STRICT_KERNEL_RWX #define MODULES_END ALIGN_DOWN(PAGE_OFFSET, SZ_256M) #define MODULES_VADDR (MODULES_END - SZ_256M) -#endif #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index a0db398b5c2658..159930351d9f96 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -184,17 +184,10 @@ static bool is_module_segment(unsigned long addr) { if (!IS_ENABLED(CONFIG_MODULES)) return false; -#ifdef MODULES_VADDR if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) return false; if (addr > ALIGN(MODULES_END, SZ_256M) - 1) return false; -#else - if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) - return false; - if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1) - return false; -#endif return true; } From 7f262b4dcf7edf75097c3946e676d6c6d77fc599 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Thu, 8 Apr 2021 11:39:51 +0800 Subject: [PATCH 178/302] powerpc/security: Make symbol 'stf_barrier' static The sparse tool complains as follows: arch/powerpc/kernel/security.c:253:6: warning: symbol 'stf_barrier' was not declared. Should it be static? This symbol is not used outside of security.c, so this commit marks it static. Signed-off-by: Li Huafei Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210408033951.28369-1-lihuafei1@huawei.com --- arch/powerpc/kernel/security.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 287286ddf7dceb..0fdfcdd9d880cc 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -252,7 +252,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c static enum stf_barrier_type stf_enabled_flush_types; static bool no_stf_barrier; -bool stf_barrier; +static bool stf_barrier; static int __init handle_no_stf_barrier(char *p) { From f6f1f48e8b3b242dfa684d6e1b930d239d87533a Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Thu, 8 Apr 2021 11:58:02 +0800 Subject: [PATCH 179/302] powerpc/mce: Make symbol 'mce_ue_event_work' static The sparse tool complains as follows: arch/powerpc/kernel/mce.c:43:1: warning: symbol 'mce_ue_event_work' was not declared. Should it be static? This symbol is not used outside of mce.c, so this commit marks it static. Signed-off-by: Li Huafei Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210408035802.31853-1-lihuafei1@huawei.com --- arch/powerpc/kernel/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 11f0cae086edba..6aa6b1cda1edda 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -40,7 +40,7 @@ static struct irq_work mce_ue_event_irq_work = { .func = machine_check_ue_irq_work, }; -DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); +static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); From 13ddd0e3acf988a98b46800178ae691640b0cd00 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 7 Apr 2021 20:57:12 +0800 Subject: [PATCH 180/302] macintosh/windfarm: Make symbol 'pm121_sys_state' static The sparse tool complains as follows: drivers/macintosh/windfarm_pm121.c:436:24: warning: symbol 'pm121_sys_state' was not declared. Should it be static? This symbol is not used outside of windfarm_pm121.c, so this commit marks it static. Reported-by: Hulk Robot Signed-off-by: Yu Kuai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210407125712.4138033-1-yukuai3@huawei.com --- drivers/macintosh/windfarm_pm121.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c index ab467b9c31be7e..ba1ec6fc11d211 100644 --- a/drivers/macintosh/windfarm_pm121.c +++ b/drivers/macintosh/windfarm_pm121.c @@ -433,7 +433,7 @@ struct pm121_sys_state { struct wf_pid_state pid; }; -struct pm121_sys_state *pm121_sys_state[N_LOOPS] = {}; +static struct pm121_sys_state *pm121_sys_state[N_LOOPS] = {}; /* * ****** CPU Fans Control Loop ****** From 4204ecd598cb0a044e6fcfd48e569080955347f4 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 7 Apr 2021 20:57:38 +0800 Subject: [PATCH 181/302] windfarm: make symbol 'wf_thread' static The sparse tool complains as follows: drivers/macintosh/windfarm_core.c:59:20: warning: symbol 'wf_thread' was not declared. Should it be static? This symbol is not used outside of windfarm_core.c, so this commit marks it static. Reported-by: Hulk Robot Signed-off-by: Yu Kuai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210407125738.4138480-1-yukuai3@huawei.com --- drivers/macintosh/windfarm_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index 77612303841e4c..07f91ec1f960e4 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -56,7 +56,7 @@ static BLOCKING_NOTIFIER_HEAD(wf_client_list); static int wf_client_count; static unsigned int wf_overtemp; static unsigned int wf_overtemp_counter; -struct task_struct *wf_thread; +static struct task_struct *wf_thread; static struct platform_device wf_platform_device = { .name = "windfarm", From 95d143923379ffb0e706b064305681d44c05ec4b Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 7 Apr 2021 20:58:03 +0800 Subject: [PATCH 182/302] macintosh/via-pmu: Make some symbols static The sparse tool complains as follows: drivers/macintosh/via-pmu.c:183:5: warning: symbol 'pmu_cur_battery' was not declared. Should it be static? drivers/macintosh/via-pmu.c:190:5: warning: symbol '__fake_sleep' was not declared. Should it be static? These symbols are not used outside of via-pmu.c, so this commit marks them static. Reported-by: Hulk Robot Signed-off-by: Yu Kuai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210407125803.4138837-1-yukuai3@huawei.com --- drivers/macintosh/via-pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 73e6ae88fafd4e..478766434919ba 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -180,14 +180,14 @@ static struct proc_dir_entry *proc_pmu_options; static int option_server_mode; int pmu_battery_count; -int pmu_cur_battery; +static int pmu_cur_battery; unsigned int pmu_power_flags = PMU_PWR_AC_PRESENT; struct pmu_battery_info pmu_batteries[PMU_MAX_BATTERIES]; static int query_batt_timer = BATTERY_POLLING_COUNT; static struct adb_request batt_req; static struct proc_dir_entry *proc_pmu_batt[PMU_MAX_BATTERIES]; -int __fake_sleep; +static int __fake_sleep; int asleep; #ifdef CONFIG_ADB From 078277acbd7c3fdb25c01a3cd5b4a1a875a1ab2f Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 7 Apr 2021 20:59:03 +0800 Subject: [PATCH 183/302] powerpc/smp: Make some symbols static The sparse tool complains as follows: arch/powerpc/kernel/smp.c:86:1: warning: symbol '__pcpu_scope_cpu_coregroup_map' was not declared. Should it be static? arch/powerpc/kernel/smp.c:125:1: warning: symbol '__pcpu_scope_thread_group_l1_cache_map' was not declared. Should it be static? arch/powerpc/kernel/smp.c:132:1: warning: symbol '__pcpu_scope_thread_group_l2_cache_map' was not declared. Should it be static? These symbols are not used outside of smp.c, so this commit marks them static. Reported-by: Hulk Robot Signed-off-by: Yu Kuai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210407125903.4139663-1-yukuai3@huawei.com --- arch/powerpc/kernel/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5a4d59a1070d5a..63ccc70bdd0d9c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -83,7 +83,7 @@ DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); -DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map); +static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); @@ -122,14 +122,14 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata; * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to * the set its siblings that share the L1-cache. */ -DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); +static DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); /* * On some big-cores system, thread_group_l2_cache_map for each CPU * corresponds to the set its siblings within the core that share the * L2-cache. */ -DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); +static DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; From 7d348494136c8b47c39d1f7ccba28c47d5094a54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 31 Mar 2021 16:45:07 +0200 Subject: [PATCH 184/302] powerpc/xive: Introduce an IPI interrupt domain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The IPI interrupt is a special case of the XIVE IRQ domain. When mapping and unmapping the interrupts in the Linux interrupt number space, the HW interrupt number 0 (XIVE_IPI_HW_IRQ) is checked to distinguish the IPI interrupt from other interrupts of the system. Simplify the XIVE interrupt domain by introducing a specific domain for the IPI. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331144514.892250-3-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 79 ++++++++++++++++++------------- 1 file changed, 46 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 595310e056f4de..e6abc38b8b4018 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1067,24 +1067,58 @@ static struct irq_chip xive_ipi_chip = { .irq_unmask = xive_ipi_do_nothing, }; -static void __init xive_request_ipi(void) +/* + * IPIs are marked per-cpu. We use separate HW interrupts under the + * hood but associated with the same "linux" interrupt + */ +static int xive_ipi_irq_domain_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) { + irq_set_chip_and_handler(virq, &xive_ipi_chip, handle_percpu_irq); + return 0; +} + +static const struct irq_domain_ops xive_ipi_irq_domain_ops = { + .map = xive_ipi_irq_domain_map, +}; + +static int __init xive_request_ipi(void) +{ + struct fwnode_handle *fwnode; + struct irq_domain *ipi_domain; unsigned int virq; + int ret = -ENOMEM; - /* - * Initialization failed, move on, we might manage to - * reach the point where we display our errors before - * the system falls appart - */ - if (!xive_irq_domain) - return; + fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI"); + if (!fwnode) + goto out; + + ipi_domain = irq_domain_create_linear(fwnode, 1, + &xive_ipi_irq_domain_ops, NULL); + if (!ipi_domain) + goto out_free_fwnode; /* Initialize it */ - virq = irq_create_mapping(xive_irq_domain, XIVE_IPI_HW_IRQ); + virq = irq_create_mapping(ipi_domain, XIVE_IPI_HW_IRQ); + if (!virq) { + ret = -EINVAL; + goto out_free_domain; + } + xive_ipi_irq = virq; - WARN_ON(request_irq(virq, xive_muxed_ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL)); + ret = request_irq(virq, xive_muxed_ipi_action, + IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL); + + WARN(ret < 0, "Failed to request IPI %d: %d\n", virq, ret); + return ret; + +out_free_domain: + irq_domain_remove(ipi_domain); +out_free_fwnode: + irq_domain_free_fwnode(fwnode); +out: + return ret; } static int xive_setup_cpu_ipi(unsigned int cpu) @@ -1178,19 +1212,6 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, */ irq_clear_status_flags(virq, IRQ_LEVEL); -#ifdef CONFIG_SMP - /* IPIs are special and come up with HW number 0 */ - if (hw == XIVE_IPI_HW_IRQ) { - /* - * IPIs are marked per-cpu. We use separate HW interrupts under - * the hood but associated with the same "linux" interrupt - */ - irq_set_chip_and_handler(virq, &xive_ipi_chip, - handle_percpu_irq); - return 0; - } -#endif - rc = xive_irq_alloc_data(virq, hw); if (rc) return rc; @@ -1202,15 +1223,7 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) { - struct irq_data *data = irq_get_irq_data(virq); - unsigned int hw_irq; - - /* XXX Assign BAD number */ - if (!data) - return; - hw_irq = (unsigned int)irqd_to_hwirq(data); - if (hw_irq != XIVE_IPI_HW_IRQ) - xive_irq_free_data(virq); + xive_irq_free_data(virq); } static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct, From 1835e72942b5aa779c8ada62aaeba03ab66d92c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 31 Mar 2021 16:45:08 +0200 Subject: [PATCH 185/302] powerpc/xive: Remove useless check on XIVE_IPI_HW_IRQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The IPI interrupt has its own domain now. Testing the HW interrupt number is not needed anymore. Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331144514.892250-4-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index e6abc38b8b4018..3badef7ec91ab0 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1421,13 +1421,12 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) struct irq_desc *desc = irq_to_desc(irq); struct irq_data *d = irq_desc_get_irq_data(desc); struct xive_irq_data *xd; - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); /* * Ignore anything that isn't a XIVE irq and ignore * IPIs, so can just be dropped. */ - if (d->domain != xive_irq_domain || hw_irq == XIVE_IPI_HW_IRQ) + if (d->domain != xive_irq_domain) continue; /* From 5159d9872823230669b7949ba3caf18c4c314846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 31 Mar 2021 16:45:09 +0200 Subject: [PATCH 186/302] powerpc/xive: Simplify xive_core_debug_show() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the IPI interrupt has its own domain, the checks on the HW interrupt number XIVE_IPI_HW_IRQ and on the chip can be replaced by a check on the domain. Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331144514.892250-5-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 3badef7ec91ab0..09da16e40320a2 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1604,17 +1604,14 @@ static void xive_debug_show_cpu(struct seq_file *m, int cpu) seq_puts(m, "\n"); } -static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d) +static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) { - struct irq_chip *chip = irq_data_get_irq_chip(d); + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); int rc; u32 target; u8 prio; u32 lirq; - if (!is_xive_irq(chip)) - return; - rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); @@ -1652,16 +1649,9 @@ static int xive_core_debug_show(struct seq_file *m, void *private) for_each_irq_desc(i, desc) { struct irq_data *d = irq_desc_get_irq_data(desc); - unsigned int hw_irq; - - if (!d) - continue; - - hw_irq = (unsigned int)irqd_to_hwirq(d); - /* IPIs are special (HW number 0) */ - if (hw_irq != XIVE_IPI_HW_IRQ) - xive_debug_show_irq(m, hw_irq, d); + if (d->domain == xive_irq_domain) + xive_debug_show_irq(m, d); } return 0; } From a74ce5926b20cd0e6d624a9b2527073a96dfed7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 31 Mar 2021 16:45:10 +0200 Subject: [PATCH 187/302] powerpc/xive: Drop check on irq_data in xive_core_debug_show() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When looping on IRQ descriptor, irq_data is always valid. Fixes: 930914b7d528 ("powerpc/xive: Add a debugfs file to dump internal XIVE state") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331144514.892250-6-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 09da16e40320a2..41753aaa2507ec 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1611,6 +1611,8 @@ static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) u32 target; u8 prio; u32 lirq; + struct xive_irq_data *xd; + u64 val; rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { @@ -1621,17 +1623,14 @@ static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", hw_irq, target, prio, lirq); - if (d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - u64 val = xive_esb_read(xd, XIVE_ESB_GET); - - seq_printf(m, "flags=%c%c%c PQ=%c%c", - xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', - xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', - xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); - } + xd = irq_data_get_irq_handler_data(d); + val = xive_esb_read(xd, XIVE_ESB_GET); + seq_printf(m, "flags=%c%c%c PQ=%c%c", + xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', + val & XIVE_ESB_VAL_P ? 'P' : '-', + val & XIVE_ESB_VAL_Q ? 'Q' : '-'); seq_puts(m, "\n"); } From 6bf66eb8f404050030805c65cf39a810892f5f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 31 Mar 2021 16:45:11 +0200 Subject: [PATCH 188/302] powerpc/xive: Simplify the dump of XIVE interrupts under xmon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the xmon routine under XIVE subsystem and rework the loop on the interrupts taking into account the xive_irq_domain to filter out IPIs. Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331144514.892250-7-clg@kaod.org --- arch/powerpc/include/asm/xive.h | 1 + arch/powerpc/sysdev/xive/common.c | 14 ++++++++++++++ arch/powerpc/xmon/xmon.c | 28 ++-------------------------- 3 files changed, 17 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 9a312b975ca826..aa094a8655b036 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -102,6 +102,7 @@ void xive_flush_interrupt(void); /* xmon hook */ void xmon_xive_do_dump(int cpu); int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d); +void xmon_xive_get_irq_all(void); /* APIs used by KVM */ u32 xive_native_default_eq_shift(void); diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 41753aaa2507ec..ba2cc14337381f 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -289,6 +289,20 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) return 0; } +void xmon_xive_get_irq_all(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_data *d = irq_desc_get_irq_data(desc); + unsigned int hwirq = (unsigned int)irqd_to_hwirq(d); + + if (d->domain == xive_irq_domain) + xmon_xive_get_irq_config(hwirq, d); + } +} + #endif /* CONFIG_XMON */ static unsigned int xive_get_irq(void) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 3fe37495f63dc7..80fbf8968f778b 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2727,30 +2727,6 @@ static void dump_all_xives(void) dump_one_xive(cpu); } -static void dump_one_xive_irq(u32 num, struct irq_data *d) -{ - xmon_xive_get_irq_config(num, d); -} - -static void dump_all_xive_irq(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_data *d = irq_desc_get_irq_data(desc); - unsigned int hwirq; - - if (!d) - continue; - - hwirq = (unsigned int)irqd_to_hwirq(d); - /* IPIs are special (HW number 0) */ - if (hwirq) - dump_one_xive_irq(hwirq, d); - } -} - static void dump_xives(void) { unsigned long num; @@ -2767,9 +2743,9 @@ static void dump_xives(void) return; } else if (c == 'i') { if (scanhex(&num)) - dump_one_xive_irq(num, NULL); + xmon_xive_get_irq_config(num, NULL); else - dump_all_xive_irq(); + xmon_xive_get_irq_all(); return; } From 33e4bc5946432a4ac173fd08e8e30a13ab94d06d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 31 Mar 2021 16:45:12 +0200 Subject: [PATCH 189/302] powerpc/xive: Fix xmon command "dxi" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When under xmon, the "dxi" command dumps the state of the XIVE interrupts. If an interrupt number is specified, only the state of the associated XIVE interrupt is dumped. This form of the command lacks an irq_data parameter which is nevertheless used by xmon_xive_get_irq_config(), leading to an xmon crash. Fix that by doing a lookup in the system IRQ mapping to query the IRQ descriptor data. Invalid interrupt numbers, or not belonging to the XIVE IRQ domain, OPAL event interrupt number for instance, should be caught by the previous query done at the firmware level. Fixes: 97ef27507793 ("powerpc/xive: Fix xmon support on the PowerNV platform") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Cédric Le Goater Tested-by: Greg Kurz Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331144514.892250-8-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index ba2cc14337381f..a9e9e57e8e73ec 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -253,17 +253,20 @@ notrace void xmon_xive_do_dump(int cpu) xmon_printf("\n"); } +static struct irq_data *xive_get_irq_data(u32 hw_irq) +{ + unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq); + + return irq ? irq_get_irq_data(irq) : NULL; +} + int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) { - struct irq_chip *chip = irq_data_get_irq_chip(d); int rc; u32 target; u8 prio; u32 lirq; - if (!is_xive_irq(chip)) - return -EINVAL; - rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); @@ -273,6 +276,9 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", hw_irq, target, prio, lirq); + if (!d) + d = xive_get_irq_data(hw_irq); + if (d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); u64 val = xive_esb_read(xd, XIVE_ESB_GET); From 7dcc37b3eff97379b194adb17eb9a8270512dd1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 31 Mar 2021 16:45:13 +0200 Subject: [PATCH 190/302] powerpc/xive: Map one IPI interrupt per node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ipistorm [*] can be used to benchmark the raw interrupt rate of an interrupt controller by measuring the number of IPIs a system can sustain. When applied to the XIVE interrupt controller of POWER9 and POWER10 systems, a significant drop of the interrupt rate can be observed when crossing the second node boundary. This is due to the fact that a single IPI interrupt is used for all CPUs of the system. The structure is shared and the cache line updates impact greatly the traffic between nodes and the overall IPI performance. As a workaround, the impact can be reduced by deactivating the IRQ lockup detector ("noirqdebug") which does a lot of accounting in the Linux IRQ descriptor structure and is responsible for most of the performance penalty. As a fix, this proposal allocates an IPI interrupt per node, to be shared by all CPUs of that node. It solves the scaling issue, the IRQ lockup detector still has an impact but the XIVE interrupt rate scales linearly. It also improves the "noirqdebug" case as showed in the tables below. * P9 DD2.2 - 2s * 64 threads "noirqdebug" Mint/s Mint/s chips cpus IPI/sys IPI/chip IPI/chip IPI/sys -------------------------------------------------------------- 1 0-15 4.984023 4.875405 4.996536 5.048892 0-31 10.879164 10.544040 10.757632 11.037859 0-47 15.345301 14.688764 14.926520 15.310053 0-63 17.064907 17.066812 17.613416 17.874511 2 0-79 11.768764 21.650749 22.689120 22.566508 0-95 10.616812 26.878789 28.434703 28.320324 0-111 10.151693 31.397803 31.771773 32.388122 0-127 9.948502 33.139336 34.875716 35.224548 * P10 DD1 - 4s (not homogeneous) 352 threads "noirqdebug" Mint/s Mint/s chips cpus IPI/sys IPI/chip IPI/chip IPI/sys -------------------------------------------------------------- 1 0-15 2.409402 2.364108 2.383303 2.395091 0-31 6.028325 6.046075 6.089999 6.073750 0-47 8.655178 8.644531 8.712830 8.724702 0-63 11.629652 11.735953 12.088203 12.055979 0-79 14.392321 14.729959 14.986701 14.973073 0-95 12.604158 13.004034 17.528748 17.568095 2 0-111 9.767753 13.719831 19.968606 20.024218 0-127 6.744566 16.418854 22.898066 22.995110 0-143 6.005699 19.174421 25.425622 25.417541 0-159 5.649719 21.938836 27.952662 28.059603 0-175 5.441410 24.109484 31.133915 31.127996 3 0-191 5.318341 24.405322 33.999221 33.775354 0-207 5.191382 26.449769 36.050161 35.867307 0-223 5.102790 29.356943 39.544135 39.508169 0-239 5.035295 31.933051 42.135075 42.071975 0-255 4.969209 34.477367 44.655395 44.757074 4 0-271 4.907652 35.887016 47.080545 47.318537 0-287 4.839581 38.076137 50.464307 50.636219 0-303 4.786031 40.881319 53.478684 53.310759 0-319 4.743750 43.448424 56.388102 55.973969 0-335 4.709936 45.623532 59.400930 58.926857 0-351 4.681413 45.646151 62.035804 61.830057 [*] https://github.com/antonblanchard/ipistorm Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331144514.892250-9-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 60 +++++++++++++++++++----- arch/powerpc/sysdev/xive/xive-internal.h | 2 - 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a9e9e57e8e73ec..edd000bcc9f633 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -63,8 +63,19 @@ static const struct xive_ops *xive_ops; static struct irq_domain *xive_irq_domain; #ifdef CONFIG_SMP -/* The IPIs all use the same logical irq number */ -static u32 xive_ipi_irq; +/* The IPIs use the same logical irq number when on the same chip */ +static struct xive_ipi_desc { + unsigned int irq; + char name[16]; +} *xive_ipis; + +/* + * Use early_cpu_to_node() for hot-plugged CPUs + */ +static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu) +{ + return xive_ipis[early_cpu_to_node(cpu)].irq; +} #endif /* Xive state for each CPU */ @@ -1106,33 +1117,53 @@ static int __init xive_request_ipi(void) { struct fwnode_handle *fwnode; struct irq_domain *ipi_domain; - unsigned int virq; + unsigned int node; int ret = -ENOMEM; fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI"); if (!fwnode) goto out; - ipi_domain = irq_domain_create_linear(fwnode, 1, + ipi_domain = irq_domain_create_linear(fwnode, nr_node_ids, &xive_ipi_irq_domain_ops, NULL); if (!ipi_domain) goto out_free_fwnode; - /* Initialize it */ - virq = irq_create_mapping(ipi_domain, XIVE_IPI_HW_IRQ); - if (!virq) { - ret = -EINVAL; + xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL); + if (!xive_ipis) goto out_free_domain; - } - xive_ipi_irq = virq; + for_each_node(node) { + struct xive_ipi_desc *xid = &xive_ipis[node]; + irq_hw_number_t ipi_hwirq = node; + + /* Skip nodes without CPUs */ + if (cpumask_empty(cpumask_of_node(node))) + continue; + + /* + * Map one IPI interrupt per node for all cpus of that node. + * Since the HW interrupt number doesn't have any meaning, + * simply use the node number. + */ + xid->irq = irq_create_mapping(ipi_domain, ipi_hwirq); + if (!xid->irq) { + ret = -EINVAL; + goto out_free_xive_ipis; + } + + snprintf(xid->name, sizeof(xid->name), "IPI-%d", node); + + ret = request_irq(xid->irq, xive_muxed_ipi_action, + IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL); - ret = request_irq(virq, xive_muxed_ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL); + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); + } - WARN(ret < 0, "Failed to request IPI %d: %d\n", virq, ret); return ret; +out_free_xive_ipis: + kfree(xive_ipis); out_free_domain: irq_domain_remove(ipi_domain); out_free_fwnode: @@ -1143,6 +1174,7 @@ static int __init xive_request_ipi(void) static int xive_setup_cpu_ipi(unsigned int cpu) { + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); struct xive_cpu *xc; int rc; @@ -1185,6 +1217,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu) static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) { + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); + /* Disable the IPI and free the IRQ data */ /* Already cleaned up ? */ diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 9cf57c722faa3d..b3a456fdd3a532 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -5,8 +5,6 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H -#define XIVE_IPI_HW_IRQ 0 /* interrupt source # for IPIs */ - /* * A "disabled" interrupt should never fire, to catch problems * we set its logical number to this From fd6db2892ebaa1383a93b4a609c65b96e615510a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 31 Mar 2021 16:45:14 +0200 Subject: [PATCH 191/302] powerpc/xive: Modernize XIVE-IPI domain with an 'alloc' handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of calling irq_create_mapping() to map the IPI for a node, introduce an 'alloc' handler. This is usually an extension to support hierarchy irq_domains which is not exactly the case for XIVE-IPI domain. However, we can now use the irq_domain_alloc_irqs() routine which allocates the IRQ descriptor on the specified node, even better for cache performance on multi node machines. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331144514.892250-10-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index edd000bcc9f633..b025f42bf1f350 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1102,15 +1102,26 @@ static struct irq_chip xive_ipi_chip = { * IPIs are marked per-cpu. We use separate HW interrupts under the * hood but associated with the same "linux" interrupt */ -static int xive_ipi_irq_domain_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t hw) +struct xive_ipi_alloc_info { + irq_hw_number_t hwirq; +}; + +static int xive_ipi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - irq_set_chip_and_handler(virq, &xive_ipi_chip, handle_percpu_irq); + struct xive_ipi_alloc_info *info = arg; + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_domain_set_info(domain, virq + i, info->hwirq + i, &xive_ipi_chip, + domain->host_data, handle_percpu_irq, + NULL, NULL); + } return 0; } static const struct irq_domain_ops xive_ipi_irq_domain_ops = { - .map = xive_ipi_irq_domain_map, + .alloc = xive_ipi_irq_domain_alloc, }; static int __init xive_request_ipi(void) @@ -1135,7 +1146,7 @@ static int __init xive_request_ipi(void) for_each_node(node) { struct xive_ipi_desc *xid = &xive_ipis[node]; - irq_hw_number_t ipi_hwirq = node; + struct xive_ipi_alloc_info info = { node }; /* Skip nodes without CPUs */ if (cpumask_empty(cpumask_of_node(node))) @@ -1146,9 +1157,9 @@ static int __init xive_request_ipi(void) * Since the HW interrupt number doesn't have any meaning, * simply use the node number. */ - xid->irq = irq_create_mapping(ipi_domain, ipi_hwirq); - if (!xid->irq) { - ret = -EINVAL; + xid->irq = irq_domain_alloc_irqs(ipi_domain, 1, node, &info); + if (xid->irq < 0) { + ret = xid->irq; goto out_free_xive_ipis; } From 5ae5bc12d0728db60a0aa9b62160ffc038875f1a Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 12 Apr 2021 13:22:50 +0530 Subject: [PATCH 192/302] powerpc/eeh: Fix EEH handling for hugepages in ioremap space. During the EEH MMIO error checking, the current implementation fails to map the (virtual) MMIO address back to the pci device on radix with hugepage mappings for I/O. This results into failure to dispatch EEH event with no recovery even when EEH capability has been enabled on the device. eeh_check_failure(token) # token = virtual MMIO address addr = eeh_token_to_phys(token); edev = eeh_addr_cache_get_dev(addr); if (!edev) return 0; eeh_dev_check_failure(edev); <= Dispatch the EEH event In case of hugepage mappings, eeh_token_to_phys() has a bug in virt -> phys translation that results in wrong physical address, which is then passed to eeh_addr_cache_get_dev() to match it against cached pci I/O address ranges to get to a PCI device. Hence, it fails to find a match and the EEH event never gets dispatched leaving the device in failed state. The commit 33439620680be ("powerpc/eeh: Handle hugepages in ioremap space") introduced following logic to translate virt to phys for hugepage mappings: eeh_token_to_phys(): + pa = pte_pfn(*ptep); + + /* On radix we can do hugepage mappings for io, so handle that */ + if (hugepage_shift) { + pa <<= hugepage_shift; <= This is wrong + pa |= token & ((1ul << hugepage_shift) - 1); + } This patch fixes the virt -> phys translation in eeh_token_to_phys() function. $ cat /sys/kernel/debug/powerpc/eeh_address_cache mem addr range [0x0000040080000000-0x00000400807fffff]: 0030:01:00.1 mem addr range [0x0000040080800000-0x0000040080ffffff]: 0030:01:00.1 mem addr range [0x0000040081000000-0x00000400817fffff]: 0030:01:00.0 mem addr range [0x0000040081800000-0x0000040081ffffff]: 0030:01:00.0 mem addr range [0x0000040082000000-0x000004008207ffff]: 0030:01:00.1 mem addr range [0x0000040082080000-0x00000400820fffff]: 0030:01:00.0 mem addr range [0x0000040082100000-0x000004008210ffff]: 0030:01:00.1 mem addr range [0x0000040082110000-0x000004008211ffff]: 0030:01:00.0 Above is the list of cached io address ranges of pci 0030:01:00.. Before this patch: Tracing 'arg1' of function eeh_addr_cache_get_dev() during error injection clearly shows that 'addr=' contains wrong physical address: kworker/u16:0-7 [001] .... 108.883775: eeh_addr_cache_get_dev: (eeh_addr_cache_get_dev+0xc/0xf0) addr=0x80103000a510 dmesg shows no EEH recovery messages: [ 108.563768] bnx2x: [bnx2x_timer:5801(eth2)]MFW seems hanged: drv_pulse (0x9ae) != mcp_pulse (0x7fff) [ 108.563788] bnx2x: [bnx2x_hw_stats_update:870(eth2)]NIG timer max (4294967295) [ 108.883788] bnx2x: [bnx2x_acquire_hw_lock:2013(eth1)]lock_status 0xffffffff resource_bit 0x1 [ 108.884407] bnx2x 0030:01:00.0 eth1: MDC/MDIO access timeout [ 108.884976] bnx2x 0030:01:00.0 eth1: MDC/MDIO access timeout <..> After this patch: eeh_addr_cache_get_dev() trace shows correct physical address: -0 [001] ..s. 1043.123828: eeh_addr_cache_get_dev: (eeh_addr_cache_get_dev+0xc/0xf0) addr=0x40080bc7cd8 dmesg logs shows EEH recovery getting triggerred: [ 964.323980] bnx2x: [bnx2x_timer:5801(eth2)]MFW seems hanged: drv_pulse (0x746f) != mcp_pulse (0x7fff) [ 964.323991] EEH: Recovering PHB#30-PE#10000 [ 964.324002] EEH: PE location: N/A, PHB location: N/A [ 964.324006] EEH: Frozen PHB#30-PE#10000 detected <..> Fixes: 33439620680b ("powerpc/eeh: Handle hugepages in ioremap space") Cc: stable@vger.kernel.org # v5.3+ Reported-by: Dominic DeMarco Signed-off-by: Mahesh Salgaonkar Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/161821396263.48361.2796709239866588652.stgit@jupiter --- arch/powerpc/kernel/eeh.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 01dbb44a0fe380..9058a26df29ccc 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -362,14 +362,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) pa = pte_pfn(*ptep); /* On radix we can do hugepage mappings for io, so handle that */ - if (hugepage_shift) { - pa <<= hugepage_shift; - pa |= token & ((1ul << hugepage_shift) - 1); - } else { - pa <<= PAGE_SHIFT; - pa |= token & (PAGE_SIZE - 1); - } + if (!hugepage_shift) + hugepage_shift = PAGE_SHIFT; + pa <<= PAGE_SHIFT; + pa |= token & ((1ul << hugepage_shift) - 1); return pa; } From c13ff6f3251318f5e1ff5b1a6d05f76996db672a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 8 Apr 2021 09:06:26 -0500 Subject: [PATCH 193/302] powerpc/rtas: improve ppc_rtas_rmo_buf_show documentation Add kerneldoc for ppc_rtas_rmo_buf_show(), the callback for /proc/powerpc/rtas/rmo_buffer, explaining its expected use. Signed-off-by: Nathan Lynch Reviewed-by: Alexey Kardashevskiy Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210408140630.205502-2-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas-proc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 2d33f342a29307..e0f8329966d653 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -757,7 +757,16 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) #define RMO_READ_BUF_MAX 30 -/* RTAS Userspace access */ +/** + * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space. + * + * Base + size description of a range of RTAS-addressable memory set + * aside for user space to use as work area(s) for certain RTAS + * functions. User space accesses this region via /dev/mem. Apart from + * security policies, the kernel does not arbitrate or serialize + * access to this region, and user space must ensure that concurrent + * users do not interfere with each other. + */ static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v) { seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX); From 01c1b9984a12a379f332c39c4b1fd96e473b93b0 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 8 Apr 2021 09:06:27 -0500 Subject: [PATCH 194/302] powerpc/rtas-proc: remove unused RMO_READ_BUF_MAX This constant is unused. Signed-off-by: Nathan Lynch Reviewed-by: Alexey Kardashevskiy Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210408140630.205502-3-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas-proc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index e0f8329966d653..d2b0d99824a425 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -755,8 +755,6 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) return 0; } -#define RMO_READ_BUF_MAX 30 - /** * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space. * From 0ab1c929ae38262c4deb18b4a2e03a4f0cb5c5ed Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 8 Apr 2021 09:06:28 -0500 Subject: [PATCH 195/302] powerpc/rtas: remove ibm_suspend_me_token There's not a compelling reason to cache the value of the token for the ibm,suspend-me function. Just look it up when needed in the RTAS syscall's special case for it. Signed-off-by: Nathan Lynch Reviewed-by: Alexey Kardashevskiy Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210408140630.205502-4-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d126d71ea5bd8f..60fcf7f7b0b88f 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -828,7 +828,6 @@ void rtas_activate_firmware(void) pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } -static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; #ifdef CONFIG_PPC_PSERIES /** * rtas_call_reentrant() - Used for reentrant rtas calls @@ -1103,7 +1102,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) return -EINVAL; /* Need to handle ibm,suspend_me call specially */ - if (token == ibm_suspend_me_token) { + if (token == rtas_token("ibm,suspend-me")) { /* * rtas_ibm_suspend_me assumes the streamid handle is in cpu @@ -1191,10 +1190,8 @@ void __init rtas_initialize(void) * the stop-self token if any */ #ifdef CONFIG_PPC64 - if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); - ibm_suspend_me_token = rtas_token("ibm,suspend-me"); - } #endif rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE, 0, rtas_region); From 0649cdc8237943c15fc977e96033dc8ae28cc2bd Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 8 Apr 2021 09:06:29 -0500 Subject: [PATCH 196/302] powerpc/rtas: move syscall filter setup into separate function Reduce conditionally compiled sections within rtas_initialize() by moving the filter table initialization into its own function already guarded by CONFIG_PPC_RTAS_FILTER. No behavior change intended. Signed-off-by: Nathan Lynch Reviewed-by: Alexey Kardashevskiy Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210408140630.205502-5-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 60fcf7f7b0b88f..24dc7bc463a82c 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1051,6 +1051,14 @@ static bool block_rtas_call(int token, int nargs, return true; } +static void __init rtas_syscall_filter_init(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) + rtas_filters[i].token = rtas_token(rtas_filters[i].name); +} + #else static bool block_rtas_call(int token, int nargs, @@ -1059,6 +1067,10 @@ static bool block_rtas_call(int token, int nargs, return false; } +static void __init rtas_syscall_filter_init(void) +{ +} + #endif /* CONFIG_PPC_RTAS_FILTER */ /* We assume to be passed big endian arguments */ @@ -1162,9 +1174,6 @@ void __init rtas_initialize(void) unsigned long rtas_region = RTAS_INSTANTIATE_MAX; u32 base, size, entry; int no_base, no_size, no_entry; -#ifdef CONFIG_PPC_RTAS_FILTER - int i; -#endif /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. @@ -1203,11 +1212,7 @@ void __init rtas_initialize(void) rtas_last_error_token = rtas_token("rtas-last-error"); #endif -#ifdef CONFIG_PPC_RTAS_FILTER - for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { - rtas_filters[i].token = rtas_token(rtas_filters[i].name); - } -#endif + rtas_syscall_filter_init(); } int __init early_init_dt_scan_rtas(unsigned long node, From e5d56763525e65417dad0d46572b234fa0008e40 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 8 Apr 2021 09:06:30 -0500 Subject: [PATCH 197/302] powerpc/rtas: rename RTAS_RMOBUF_MAX to RTAS_USER_REGION_SIZE RTAS_RMOBUF_MAX doesn't actually describe a "maximum" value in any sense. It represents the size of an area of memory set aside for user space to use as work areas for certain RTAS calls. Rename it to RTAS_USER_REGION_SIZE. Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210408140630.205502-6-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 6 +++--- arch/powerpc/kernel/rtas-proc.c | 2 +- arch/powerpc/kernel/rtas.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 658448ca5b8ae9..9dc97d2f9d27e0 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -19,8 +19,8 @@ #define RTAS_UNKNOWN_SERVICE (-1) #define RTAS_INSTANTIATE_MAX (1ULL<<30) /* Don't instantiate rtas at/above this value */ -/* Buffer size for ppc_rtas system call. */ -#define RTAS_RMOBUF_MAX (64 * 1024) +/* Memory set aside for sys_rtas to use with calls that need a work area. */ +#define RTAS_USER_REGION_SIZE (64 * 1024) /* RTAS return status codes */ #define RTAS_BUSY -2 /* RTAS Busy */ @@ -357,7 +357,7 @@ extern void rtas_take_timebase(void); static inline int page_is_rtas_user_buf(unsigned long pfn) { unsigned long paddr = (pfn << PAGE_SHIFT); - if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_RMOBUF_MAX)) + if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_USER_REGION_SIZE)) return 1; return 0; } diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index d2b0d99824a425..6857a5b0a1c39c 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -767,6 +767,6 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) */ static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v) { - seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX); + seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_USER_REGION_SIZE); return 0; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 24dc7bc463a82c..6bada744402b1c 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -987,10 +987,10 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { static bool in_rmo_buf(u32 base, u32 end) { return base >= rtas_rmo_buf && - base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) && + base < (rtas_rmo_buf + RTAS_USER_REGION_SIZE) && base <= end && end >= rtas_rmo_buf && - end < (rtas_rmo_buf + RTAS_RMOBUF_MAX); + end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE); } static bool block_rtas_call(int token, int nargs, @@ -1202,7 +1202,7 @@ void __init rtas_initialize(void) if (firmware_has_feature(FW_FEATURE_LPAR)) rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); #endif - rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE, + rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE, 0, rtas_region); if (!rtas_rmo_buf) panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n", From 14b3c9d24a7a5c274a9df27d245516f466d3bc5f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 2 Mar 2021 00:30:18 +0900 Subject: [PATCH 198/302] powerpc/syscalls: switch to generic syscalltbl.sh Many architectures duplicate similar shell scripts. This commit converts powerpc to use scripts/syscalltbl.sh. This also unifies syscall_table_32.h and syscall_table_c32.h. Signed-off-by: Masahiro Yamada Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210301153019.362742-1-masahiroy@kernel.org --- arch/powerpc/include/asm/Kbuild | 1 - arch/powerpc/kernel/syscalls/Makefile | 22 +++---------- arch/powerpc/kernel/syscalls/syscalltbl.sh | 36 --------------------- arch/powerpc/kernel/systbl.S | 5 ++- arch/powerpc/platforms/cell/spu_callbacks.c | 2 +- 5 files changed, 10 insertions(+), 56 deletions(-) delete mode 100644 arch/powerpc/kernel/syscalls/syscalltbl.sh diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index e1f9b4ea1c537b..bcf95ce0964f98 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generated-y += syscall_table_32.h generated-y += syscall_table_64.h -generated-y += syscall_table_c32.h generated-y += syscall_table_spu.h generic-y += export.h generic-y += kvm_types.h diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile index 9e3be295dbba26..df21c731c80627 100644 --- a/arch/powerpc/kernel/syscalls/Makefile +++ b/arch/powerpc/kernel/syscalls/Makefile @@ -7,7 +7,7 @@ _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ syscall := $(src)/syscall.tbl syshdr := $(srctree)/$(src)/syscallhdr.sh -systbl := $(srctree)/$(src)/syscalltbl.sh +systbl := $(srctree)/scripts/syscalltbl.sh quiet_cmd_syshdr = SYSHDR $@ cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \ @@ -16,10 +16,7 @@ quiet_cmd_syshdr = SYSHDR $@ '$(syshdr_offset_$(basetarget))' quiet_cmd_systbl = SYSTBL $@ - cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \ - '$(systbl_abis_$(basetarget))' \ - '$(systbl_abi_$(basetarget))' \ - '$(systbl_offset_$(basetarget))' + cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@ syshdr_abis_unistd_32 := common,nospu,32 $(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE @@ -29,30 +26,21 @@ syshdr_abis_unistd_64 := common,nospu,64 $(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE $(call if_changed,syshdr) -systbl_abis_syscall_table_32 := common,nospu,32 -systbl_abi_syscall_table_32 := 32 +$(kapi)/syscall_table_32.h: abis := common,nospu,32 $(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) -systbl_abis_syscall_table_64 := common,nospu,64 -systbl_abi_syscall_table_64 := 64 +$(kapi)/syscall_table_64.h: abis := common,nospu,64 $(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) -systbl_abis_syscall_table_c32 := common,nospu,32 -systbl_abi_syscall_table_c32 := c32 -$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) FORCE - $(call if_changed,systbl) - -systbl_abis_syscall_table_spu := common,spu -systbl_abi_syscall_table_spu := spu +$(kapi)/syscall_table_spu.h: abis := common,spu $(kapi)/syscall_table_spu.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) uapisyshdr-y += unistd_32.h unistd_64.h kapisyshdr-y += syscall_table_32.h \ syscall_table_64.h \ - syscall_table_c32.h \ syscall_table_spu.h uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) diff --git a/arch/powerpc/kernel/syscalls/syscalltbl.sh b/arch/powerpc/kernel/syscalls/syscalltbl.sh deleted file mode 100644 index f7393a7b18aa0c..00000000000000 --- a/arch/powerpc/kernel/syscalls/syscalltbl.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -in="$1" -out="$2" -my_abis=`echo "($3)" | tr ',' '|'` -my_abi="$4" -offset="$5" - -emit() { - t_nxt="$1" - t_nr="$2" - t_entry="$3" - - while [ $t_nxt -lt $t_nr ]; do - printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}" - t_nxt=$((t_nxt+1)) - done - printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}" -} - -grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | ( - nxt=0 - if [ -z "$offset" ]; then - offset=0 - fi - - while read nr abi name entry compat ; do - if [ "$my_abi" = "c32" ] && [ ! -z "$compat" ]; then - emit $((nxt+offset)) $((nr+offset)) $compat - else - emit $((nxt+offset)) $((nr+offset)) $entry - fi - nxt=$((nr+1)) - done -) > "$out" diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index d34276f3c495fe..cb3358886203e9 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -21,6 +21,7 @@ #define __SYSCALL(nr, entry) .long entry #endif +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) .globl sys_call_table sys_call_table: #ifdef CONFIG_PPC64 @@ -30,8 +31,10 @@ sys_call_table: #endif #ifdef CONFIG_COMPAT +#undef __SYSCALL_WITH_COMPAT +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) .globl compat_sys_call_table compat_sys_call_table: #define compat_sys_sigsuspend sys_sigsuspend -#include +#include #endif diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index abdef9bcf43241..fe0d8797a00a3b 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -35,9 +35,9 @@ */ static void *spu_syscall_table[] = { +#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry) #define __SYSCALL(nr, entry) [nr] = entry, #include -#undef __SYSCALL }; long spu_sys_callback(struct spu_syscall_block *s) From 672bff581e19d5d7bef993f910ed385c4054cbbc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 2 Mar 2021 00:30:19 +0900 Subject: [PATCH 199/302] powerpc/syscalls: switch to generic syscallhdr.sh Many architectures duplicate similar shell scripts. This commit converts powerpc to use scripts/syscallhdr.sh. Signed-off-by: Masahiro Yamada Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210301153019.362742-2-masahiroy@kernel.org --- arch/powerpc/kernel/syscalls/Makefile | 11 +++---- arch/powerpc/kernel/syscalls/syscallhdr.sh | 36 ---------------------- 2 files changed, 4 insertions(+), 43 deletions(-) delete mode 100644 arch/powerpc/kernel/syscalls/syscallhdr.sh diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile index df21c731c80627..5476f62eb80f96 100644 --- a/arch/powerpc/kernel/syscalls/Makefile +++ b/arch/powerpc/kernel/syscalls/Makefile @@ -6,23 +6,20 @@ _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') syscall := $(src)/syscall.tbl -syshdr := $(srctree)/$(src)/syscallhdr.sh +syshdr := $(srctree)/scripts/syscallhdr.sh systbl := $(srctree)/scripts/syscalltbl.sh quiet_cmd_syshdr = SYSHDR $@ - cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \ - '$(syshdr_abis_$(basetarget))' \ - '$(syshdr_pfx_$(basetarget))' \ - '$(syshdr_offset_$(basetarget))' + cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis $(abis) $< $@ quiet_cmd_systbl = SYSTBL $@ cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@ -syshdr_abis_unistd_32 := common,nospu,32 +$(uapi)/unistd_32.h: abis := common,nospu,32 $(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE $(call if_changed,syshdr) -syshdr_abis_unistd_64 := common,nospu,64 +$(uapi)/unistd_64.h: abis := common,nospu,64 $(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE $(call if_changed,syshdr) diff --git a/arch/powerpc/kernel/syscalls/syscallhdr.sh b/arch/powerpc/kernel/syscalls/syscallhdr.sh deleted file mode 100644 index 02d6751f3be36c..00000000000000 --- a/arch/powerpc/kernel/syscalls/syscallhdr.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -in="$1" -out="$2" -my_abis=`echo "($3)" | tr ',' '|'` -prefix="$4" -offset="$5" - -fileguard=_UAPI_ASM_POWERPC_`basename "$out" | sed \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \ - -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'` -grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | ( - printf "#ifndef %s\n" "${fileguard}" - printf "#define %s\n" "${fileguard}" - printf "\n" - - nxt=0 - while read nr abi name entry compat ; do - if [ -z "$offset" ]; then - printf "#define __NR_%s%s\t%s\n" \ - "${prefix}" "${name}" "${nr}" - else - printf "#define __NR_%s%s\t(%s + %s)\n" \ - "${prefix}" "${name}" "${offset}" "${nr}" - fi - nxt=$((nr+1)) - done - - printf "\n" - printf "#ifdef __KERNEL__\n" - printf "#define __NR_syscalls\t%s\n" "${nxt}" - printf "#endif\n" - printf "\n" - printf "#endif /* %s */\n" "${fileguard}" -) > "$out" From 472724111f0f72042deb6a9dcee9578e5398a1a1 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Thu, 8 Apr 2021 17:19:16 -0300 Subject: [PATCH 200/302] powerpc/iommu: Enable remaining IOMMU Pagesizes present in LoPAR According to LoPAR, ibm,query-pe-dma-window output named "IO Page Sizes" will let the OS know all possible pagesizes that can be used for creating a new DDW. Currently Linux will only try using 3 of the 8 available options: 4K, 64K and 16M. According to LoPAR, Hypervisor may also offer 32M, 64M, 128M, 256M and 16G. Enabling bigger pages would be interesting for direct mapping systems with a lot of RAM, while using less TCE entries. Signed-off-by: Leonardo Bras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210408201915.174217-1-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 37 +++++++++++++++++++++----- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 9fc5217f0c8e5a..67c9953a6503c6 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1099,6 +1099,33 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) ret); } +/* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */ +static int iommu_get_page_shift(u32 query_page_size) +{ + /* Supported IO page-sizes according to LoPAR */ + const int shift[] = { + __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M), + __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M), + __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G) + }; + + int i = ARRAY_SIZE(shift) - 1; + + /* + * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field: + * - bit 31 means 4k pages are supported, + * - bit 30 means 64k pages are supported, and so on. + * Larger pagesizes map more memory with the same amount of TCEs, so start probing them. + */ + for (; i >= 0 ; i--) { + if (query_page_size & (1 << i)) + return shift[i]; + } + + /* No valid page size found. */ + return 0; +} + /* * If the PE supports dynamic dma windows, and there is space for a table * that can map all pages in a linear offset, then setup such a table, @@ -1206,13 +1233,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) goto out_failed; } } - if (query.page_size & 4) { - page_shift = 24; /* 16MB */ - } else if (query.page_size & 2) { - page_shift = 16; /* 64kB */ - } else if (query.page_size & 1) { - page_shift = 12; /* 4kB */ - } else { + + page_shift = iommu_get_page_shift(query.page_size); + if (!page_shift) { dev_dbg(&dev->dev, "no supported direct page size in mask %x", query.page_size); goto out_failed; From 193e4cd8ed9dd01092d01df7706a6b344c946af4 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Fri, 9 Apr 2021 17:01:09 +0800 Subject: [PATCH 201/302] powerpc/pseries: Make symbol '__pcpu_scope_hcall_stats' static The sparse tool complains as follows: arch/powerpc/platforms/pseries/hvCall_inst.c:29:1: warning: symbol '__pcpu_scope_hcall_stats' was not declared. Should it be static? This symbol is not used outside of hvCall_inst.c, so this commit marks it static. Reported-by: Hulk Robot Signed-off-by: Bixuan Cui Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210409090109.59347-1-cuibixuan@huawei.com --- arch/powerpc/platforms/pseries/hvCall_inst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 2c59b4986ea540..3a50612a78db8b 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -26,7 +26,7 @@ struct hcall_stats { }; #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) -DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); +static DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); /* * Routines for displaying the statistics in debugfs From 2235dea17d56238642121a8085b71d68598534bb Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Fri, 9 Apr 2021 17:01:14 +0800 Subject: [PATCH 202/302] powerpc/pseries/pmem: Make symbol 'drc_pmem_match' static The sparse tool complains as follows: arch/powerpc/platforms/pseries/pmem.c:142:27: warning: symbol 'drc_pmem_match' was not declared. Should it be static? This symbol is not used outside of pmem.c, so this commit marks it static. Reported-by: Hulk Robot Signed-off-by: Bixuan Cui Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210409090114.59396-1-cuibixuan@huawei.com --- arch/powerpc/platforms/pseries/pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c index e1dc5d3254df9c..439ac72c247088 100644 --- a/arch/powerpc/platforms/pseries/pmem.c +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -139,7 +139,7 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) return rc; } -const struct of_device_id drc_pmem_match[] = { +static const struct of_device_id drc_pmem_match[] = { { .type = "ibm,persistent-memory", }, {} }; From 107dadb046178173dea18e0a78ff8ea3cc27c213 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Fri, 9 Apr 2021 17:01:19 +0800 Subject: [PATCH 203/302] powerpc/perf: Make symbol 'isa207_pmu_format_attr' static The sparse tool complains as follows: arch/powerpc/perf/isa207-common.c:24:18: warning: symbol 'isa207_pmu_format_attr' was not declared. Should it be static? This symbol is not used outside of isa207-common.c, so this commit marks it static. Reported-by: Hulk Robot Signed-off-by: Bixuan Cui Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210409090119.59444-1-cuibixuan@huawei.com --- arch/powerpc/perf/isa207-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 8b5eeb6fb2fb3b..48b2d9a5096ca2 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -21,7 +21,7 @@ PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); PMU_FORMAT_ATTR(thresh_start, "config:36-39"); PMU_FORMAT_ATTR(thresh_cmp, "config:40-49"); -struct attribute *isa207_pmu_format_attr[] = { +static struct attribute *isa207_pmu_format_attr[] = { &format_attr_event.attr, &format_attr_pmcxsel.attr, &format_attr_mark.attr, From cc331eee03eadd750af1fb957d020b3f24e5e056 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Fri, 9 Apr 2021 17:01:24 +0800 Subject: [PATCH 204/302] powerpc/perf/hv-24x7: Make some symbols static The sparse tool complains as follows: arch/powerpc/perf/hv-24x7.c:229:1: warning: symbol '__pcpu_scope_hv_24x7_txn_flags' was not declared. Should it be static? arch/powerpc/perf/hv-24x7.c:230:1: warning: symbol '__pcpu_scope_hv_24x7_txn_err' was not declared. Should it be static? arch/powerpc/perf/hv-24x7.c:236:1: warning: symbol '__pcpu_scope_hv_24x7_hw' was not declared. Should it be static? arch/powerpc/perf/hv-24x7.c:244:1: warning: symbol '__pcpu_scope_hv_24x7_reqb' was not declared. Should it be static? arch/powerpc/perf/hv-24x7.c:245:1: warning: symbol '__pcpu_scope_hv_24x7_resb' was not declared. Should it be static? This symbol is not used outside of hv-24x7.c, so this commit marks it static. Reported-by: Hulk Robot Signed-off-by: Bixuan Cui Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210409090124.59492-1-cuibixuan@huawei.com --- arch/powerpc/perf/hv-24x7.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index e5eb33255066c4..1816f560a46523 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -226,14 +226,14 @@ static struct attribute_group event_long_desc_group = { static struct kmem_cache *hv_page_cache; -DEFINE_PER_CPU(int, hv_24x7_txn_flags); -DEFINE_PER_CPU(int, hv_24x7_txn_err); +static DEFINE_PER_CPU(int, hv_24x7_txn_flags); +static DEFINE_PER_CPU(int, hv_24x7_txn_err); struct hv_24x7_hw { struct perf_event *events[255]; }; -DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); +static DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); /* * request_buffer and result_buffer are not required to be 4k aligned, @@ -241,8 +241,8 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); * the simplest way to ensure that. */ #define H24x7_DATA_BUFFER_SIZE 4096 -DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); -DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); +static DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); +static DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); static unsigned int max_num_requests(int interface_version) { From f234ad405a35262ed2d8dd2d29fc633908dce955 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Fri, 9 Apr 2021 15:01:51 +0800 Subject: [PATCH 205/302] powerpc/xmon: Make symbol 'spu_inst_dump' static Fix sparse warning: arch/powerpc/xmon/xmon.c:4216:1: warning: symbol 'spu_inst_dump' was not declared. Should it be static? This symbol is not used outside of xmon.c, so make it static. Signed-off-by: Pu Lehui Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210409070151.163424-1-pulehui@huawei.com --- arch/powerpc/xmon/xmon.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 80fbf8968f778b..2e94647c87118b 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -4188,8 +4188,7 @@ static void dump_spu_fields(struct spu *spu) DUMP_FIELD(spu, "0x%p", pdata); } -int -spu_inst_dump(unsigned long adr, long count, int praddr) +static int spu_inst_dump(unsigned long adr, long count, int praddr) { return generic_inst_dump(adr, count, praddr, print_insn_spu); } From ff0b4155ae9903539d1299a9a4c8717fb7eb6009 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Fri, 9 Apr 2021 14:38:55 +0800 Subject: [PATCH 206/302] powerpc/powernv: make symbol 'mpipl_kobj' static The sparse tool complains as follows: arch/powerpc/platforms/powernv/opal-core.c:74:16: warning: symbol 'mpipl_kobj' was not declared. This symbol is not used outside of opal-core.c, so marks it static. Reported-by: Hulk Robot Signed-off-by: Bixuan Cui Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210409063855.57347-1-cuibixuan@huawei.com --- arch/powerpc/platforms/powernv/opal-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index 0d9ba70f72517e..5b9736bbc2aa37 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -71,7 +71,7 @@ static LIST_HEAD(opalcore_list); static struct opalcore_config *oc_conf; static const struct opal_mpipl_fadump *opalc_metadata; static const struct opal_mpipl_fadump *opalc_cpu_metadata; -struct kobject *mpipl_kobj; +static struct kobject *mpipl_kobj; /* * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered From b26e8f27253a47bff90972b987112fd8396e9b8d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:24 +0000 Subject: [PATCH 207/302] powerpc/mem: Move cache flushing functions into mm/cacheflush.c Cache flushing functions are in the middle of completely unrelated stuff in mm/mem.c Create a dedicated mm/cacheflush.c for those functions. Also cleanup the list of included headers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7bf6f1600acad146e541a4e220940062f2e5b03d.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/Makefile | 3 +- arch/powerpc/mm/cacheflush.c | 255 +++++++++++++++++++++++++++++++ arch/powerpc/mm/mem.c | 281 ----------------------------------- 3 files changed, 257 insertions(+), 282 deletions(-) create mode 100644 arch/powerpc/mm/cacheflush.c diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 3b4e9e4e25eaa2..c3df3a8501d480 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -8,7 +8,8 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ - init-common.o mmu_context.o drmem.o + init-common.o mmu_context.o drmem.o \ + cacheflush.o obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/ obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/ diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c new file mode 100644 index 00000000000000..40613d2fda378a --- /dev/null +++ b/arch/powerpc/mm/cacheflush.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +/** + * flush_coherent_icache() - if a CPU has a coherent icache, flush it + * @addr: The base address to use (can be any valid address, the whole cache will be flushed) + * Return true if the cache was flushed, false otherwise + */ +static inline bool flush_coherent_icache(unsigned long addr) +{ + /* + * For a snooping icache, we still need a dummy icbi to purge all the + * prefetched instructions from the ifetch buffers. We also need a sync + * before the icbi to order the the actual stores to memory that might + * have modified instructions with the icbi. + */ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + mb(); /* sync */ + allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); + icbi((void *)addr); + prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); + mb(); /* sync */ + isync(); + return true; + } + + return false; +} + +/** + * invalidate_icache_range() - Flush the icache by issuing icbi across an address range + * @start: the start address + * @stop: the stop address (exclusive) + */ +static void invalidate_icache_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_icache_shift(); + unsigned long bytes = l1_icache_bytes(); + char *addr = (char *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + icbi(addr); + + mb(); /* sync */ + isync(); +} + +/** + * flush_icache_range: Write any modified data cache blocks out to memory + * and invalidate the corresponding blocks in the instruction cache + * + * Generic code will call this after writing memory, before executing from it. + * + * @start: the start address + * @stop: the stop address (exclusive) + */ +void flush_icache_range(unsigned long start, unsigned long stop) +{ + if (flush_coherent_icache(start)) + return; + + clean_dcache_range(start, stop); + + if (IS_ENABLED(CONFIG_44x)) { + /* + * Flash invalidate on 44x because we are passed kmapped + * addresses and this doesn't work for userspace pages due to + * the virtually tagged icache. + */ + iccci((void *)start); + mb(); /* sync */ + isync(); + } else + invalidate_icache_range(start, stop); +} +EXPORT_SYMBOL(flush_icache_range); + +#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) +/** + * flush_dcache_icache_phys() - Flush a page by it's physical address + * @physaddr: the physical address of the page + */ +static void flush_dcache_icache_phys(unsigned long physaddr) +{ + unsigned long bytes = l1_dcache_bytes(); + unsigned long nb = PAGE_SIZE / bytes; + unsigned long addr = physaddr & PAGE_MASK; + unsigned long msr, msr0; + unsigned long loop1 = addr, loop2 = addr; + + msr0 = mfmsr(); + msr = msr0 & ~MSR_DR; + /* + * This must remain as ASM to prevent potential memory accesses + * while the data MMU is disabled + */ + asm volatile( + " mtctr %2;\n" + " mtmsr %3;\n" + " isync;\n" + "0: dcbst 0, %0;\n" + " addi %0, %0, %4;\n" + " bdnz 0b;\n" + " sync;\n" + " mtctr %2;\n" + "1: icbi 0, %1;\n" + " addi %1, %1, %4;\n" + " bdnz 1b;\n" + " sync;\n" + " mtmsr %5;\n" + " isync;\n" + : "+&r" (loop1), "+&r" (loop2) + : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) + : "ctr", "memory"); +} +NOKPROBE_SYMBOL(flush_dcache_icache_phys) +#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) + +/* + * This is called when a page has been modified by the kernel. + * It just marks the page as not i-cache clean. We do the i-cache + * flush later when the page is given to a user process, if necessary. + */ +void flush_dcache_page(struct page *page) +{ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* avoid an atomic op if possible */ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} +EXPORT_SYMBOL(flush_dcache_page); + +static void flush_dcache_icache_hugepage(struct page *page) +{ + int i; + void *start; + + BUG_ON(!PageCompound(page)); + + for (i = 0; i < compound_nr(page); i++) { + if (!PageHighMem(page)) { + __flush_dcache_icache(page_address(page+i)); + } else { + start = kmap_atomic(page+i); + __flush_dcache_icache(start); + kunmap_atomic(start); + } + } +} + +void flush_dcache_icache_page(struct page *page) +{ + + if (PageCompound(page)) + return flush_dcache_icache_hugepage(page); + +#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) + /* On 8xx there is no need to kmap since highmem is not supported */ + __flush_dcache_icache(page_address(page)); +#else + if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { + void *start = kmap_atomic(page); + __flush_dcache_icache(start); + kunmap_atomic(start); + } else { + unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; + + if (flush_coherent_icache(addr)) + return; + flush_dcache_icache_phys(addr); + } +#endif +} +EXPORT_SYMBOL(flush_dcache_icache_page); + +/** + * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * @page: the address of the page to flush + */ +void __flush_dcache_icache(void *p) +{ + unsigned long addr = (unsigned long)p; + + if (flush_coherent_icache(addr)) + return; + + clean_dcache_range(addr, addr + PAGE_SIZE); + + /* + * We don't flush the icache on 44x. Those have a virtual icache and we + * don't have access to the virtual address here (it's not the page + * vaddr but where it's mapped in user space). The flushing of the + * icache on these is handled elsewhere, when a change in the address + * space occurs, before returning to user space. + */ + + if (mmu_has_feature(MMU_FTR_TYPE_44x)) + return; + + invalidate_icache_range(addr, addr + PAGE_SIZE); +} + +void clear_user_page(void *page, unsigned long vaddr, struct page *pg) +{ + clear_page(page); + + /* + * We shouldn't have to do this, but some versions of glibc + * require it (ld.so assumes zero filled pages are icache clean) + * - Anton + */ + flush_dcache_page(pg); +} +EXPORT_SYMBOL(clear_user_page); + +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *pg) +{ + copy_page(vto, vfrom); + + /* + * We should be able to use the following optimisation, however + * there are two problems. + * Firstly a bug in some versions of binutils meant PLT sections + * were not marked executable. + * Secondly the first word in the GOT section is blrl, used + * to establish the GOT address. Until recently the GOT was + * not marked executable. + * - Anton + */ +#if 0 + if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) + return; +#endif + + flush_dcache_page(pg); +} + +void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + unsigned long maddr; + + maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); + flush_icache_range(maddr, maddr + len); + kunmap(page); +} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 7a59a5c9aa5dc9..6564b4d8132451 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -12,45 +12,15 @@ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include -#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include #include @@ -340,257 +310,6 @@ void free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } -/** - * flush_coherent_icache() - if a CPU has a coherent icache, flush it - * @addr: The base address to use (can be any valid address, the whole cache will be flushed) - * Return true if the cache was flushed, false otherwise - */ -static inline bool flush_coherent_icache(unsigned long addr) -{ - /* - * For a snooping icache, we still need a dummy icbi to purge all the - * prefetched instructions from the ifetch buffers. We also need a sync - * before the icbi to order the the actual stores to memory that might - * have modified instructions with the icbi. - */ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { - mb(); /* sync */ - allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - icbi((void *)addr); - prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - mb(); /* sync */ - isync(); - return true; - } - - return false; -} - -/** - * invalidate_icache_range() - Flush the icache by issuing icbi across an address range - * @start: the start address - * @stop: the stop address (exclusive) - */ -static void invalidate_icache_range(unsigned long start, unsigned long stop) -{ - unsigned long shift = l1_icache_shift(); - unsigned long bytes = l1_icache_bytes(); - char *addr = (char *)(start & ~(bytes - 1)); - unsigned long size = stop - (unsigned long)addr + (bytes - 1); - unsigned long i; - - for (i = 0; i < size >> shift; i++, addr += bytes) - icbi(addr); - - mb(); /* sync */ - isync(); -} - -/** - * flush_icache_range: Write any modified data cache blocks out to memory - * and invalidate the corresponding blocks in the instruction cache - * - * Generic code will call this after writing memory, before executing from it. - * - * @start: the start address - * @stop: the stop address (exclusive) - */ -void flush_icache_range(unsigned long start, unsigned long stop) -{ - if (flush_coherent_icache(start)) - return; - - clean_dcache_range(start, stop); - - if (IS_ENABLED(CONFIG_44x)) { - /* - * Flash invalidate on 44x because we are passed kmapped - * addresses and this doesn't work for userspace pages due to - * the virtually tagged icache. - */ - iccci((void *)start); - mb(); /* sync */ - isync(); - } else - invalidate_icache_range(start, stop); -} -EXPORT_SYMBOL(flush_icache_range); - -#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) -/** - * flush_dcache_icache_phys() - Flush a page by it's physical address - * @physaddr: the physical address of the page - */ -static void flush_dcache_icache_phys(unsigned long physaddr) -{ - unsigned long bytes = l1_dcache_bytes(); - unsigned long nb = PAGE_SIZE / bytes; - unsigned long addr = physaddr & PAGE_MASK; - unsigned long msr, msr0; - unsigned long loop1 = addr, loop2 = addr; - - msr0 = mfmsr(); - msr = msr0 & ~MSR_DR; - /* - * This must remain as ASM to prevent potential memory accesses - * while the data MMU is disabled - */ - asm volatile( - " mtctr %2;\n" - " mtmsr %3;\n" - " isync;\n" - "0: dcbst 0, %0;\n" - " addi %0, %0, %4;\n" - " bdnz 0b;\n" - " sync;\n" - " mtctr %2;\n" - "1: icbi 0, %1;\n" - " addi %1, %1, %4;\n" - " bdnz 1b;\n" - " sync;\n" - " mtmsr %5;\n" - " isync;\n" - : "+&r" (loop1), "+&r" (loop2) - : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) - : "ctr", "memory"); -} -NOKPROBE_SYMBOL(flush_dcache_icache_phys) -#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) - -/* - * This is called when a page has been modified by the kernel. - * It just marks the page as not i-cache clean. We do the i-cache - * flush later when the page is given to a user process, if necessary. - */ -void flush_dcache_page(struct page *page) -{ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - return; - /* avoid an atomic op if possible */ - if (test_bit(PG_dcache_clean, &page->flags)) - clear_bit(PG_dcache_clean, &page->flags); -} -EXPORT_SYMBOL(flush_dcache_page); - -static void flush_dcache_icache_hugepage(struct page *page) -{ - int i; - void *start; - - BUG_ON(!PageCompound(page)); - - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { - start = kmap_atomic(page+i); - __flush_dcache_icache(start); - kunmap_atomic(start); - } - } -} - -void flush_dcache_icache_page(struct page *page) -{ - - if (PageCompound(page)) - return flush_dcache_icache_hugepage(page); - -#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) - /* On 8xx there is no need to kmap since highmem is not supported */ - __flush_dcache_icache(page_address(page)); -#else - if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { - void *start = kmap_atomic(page); - __flush_dcache_icache(start); - kunmap_atomic(start); - } else { - unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; - - if (flush_coherent_icache(addr)) - return; - flush_dcache_icache_phys(addr); - } -#endif -} -EXPORT_SYMBOL(flush_dcache_icache_page); - -/** - * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * - * @page: the address of the page to flush - */ -void __flush_dcache_icache(void *p) -{ - unsigned long addr = (unsigned long)p; - - if (flush_coherent_icache(addr)) - return; - - clean_dcache_range(addr, addr + PAGE_SIZE); - - /* - * We don't flush the icache on 44x. Those have a virtual icache and we - * don't have access to the virtual address here (it's not the page - * vaddr but where it's mapped in user space). The flushing of the - * icache on these is handled elsewhere, when a change in the address - * space occurs, before returning to user space. - */ - - if (mmu_has_feature(MMU_FTR_TYPE_44x)) - return; - - invalidate_icache_range(addr, addr + PAGE_SIZE); -} - -void clear_user_page(void *page, unsigned long vaddr, struct page *pg) -{ - clear_page(page); - - /* - * We shouldn't have to do this, but some versions of glibc - * require it (ld.so assumes zero filled pages are icache clean) - * - Anton - */ - flush_dcache_page(pg); -} -EXPORT_SYMBOL(clear_user_page); - -void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, - struct page *pg) -{ - copy_page(vto, vfrom); - - /* - * We should be able to use the following optimisation, however - * there are two problems. - * Firstly a bug in some versions of binutils meant PLT sections - * were not marked executable. - * Secondly the first word in the GOT section is blrl, used - * to establish the GOT address. Until recently the GOT was - * not marked executable. - * - Anton - */ -#if 0 - if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) - return; -#endif - - flush_dcache_page(pg); -} - -void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, - unsigned long addr, int len) -{ - unsigned long maddr; - - maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); - flush_icache_range(maddr, maddr + len); - kunmap(page); -} - /* * System memory should not be in /proc/iomem but various tools expect it * (eg kdump). From bf26e0bbd2f82b52605cd7c880245eefe67e09f3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:26 +0000 Subject: [PATCH 208/302] powerpc/mem: Declare __flush_dcache_icache() static __flush_dcache_icache() is only used in mem.c. Move it before the functions that use it and declare it static. And also fix the name of the parameter in the comment. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3fa903eb5a10b2bc7d99a8c559ffdaa05452d8e0.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cacheflush.h | 1 - arch/powerpc/mm/cacheflush.c | 60 +++++++++++++-------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index f63495109f6342..9110489ea411fe 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -40,7 +40,6 @@ void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, #define flush_icache_user_page flush_icache_user_page void flush_dcache_icache_page(struct page *page); -void __flush_dcache_icache(void *page); /** * flush_dcache_range(): Write any modified data cache blocks out to memory and diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 40613d2fda378a..742d3e0fb12f32 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -135,6 +135,36 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); +/** + * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * @p: the address of the page to flush + */ +static void __flush_dcache_icache(void *p) +{ + unsigned long addr = (unsigned long)p; + + if (flush_coherent_icache(addr)) + return; + + clean_dcache_range(addr, addr + PAGE_SIZE); + + /* + * We don't flush the icache on 44x. Those have a virtual icache and we + * don't have access to the virtual address here (it's not the page + * vaddr but where it's mapped in user space). The flushing of the + * icache on these is handled elsewhere, when a change in the address + * space occurs, before returning to user space. + */ + + if (mmu_has_feature(MMU_FTR_TYPE_44x)) + return; + + invalidate_icache_range(addr, addr + PAGE_SIZE); +} + static void flush_dcache_icache_hugepage(struct page *page) { int i; @@ -178,36 +208,6 @@ void flush_dcache_icache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_icache_page); -/** - * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * - * @page: the address of the page to flush - */ -void __flush_dcache_icache(void *p) -{ - unsigned long addr = (unsigned long)p; - - if (flush_coherent_icache(addr)) - return; - - clean_dcache_range(addr, addr + PAGE_SIZE); - - /* - * We don't flush the icache on 44x. Those have a virtual icache and we - * don't have access to the virtual address here (it's not the page - * vaddr but where it's mapped in user space). The flushing of the - * icache on these is handled elsewhere, when a change in the address - * space occurs, before returning to user space. - */ - - if (mmu_has_feature(MMU_FTR_TYPE_44x)) - return; - - invalidate_icache_range(addr, addr + PAGE_SIZE); -} - void clear_user_page(void *page, unsigned long vaddr, struct page *pg) { clear_page(page); From 131637a17dc97fde3d007ab224e30c7ff4e62f6e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:27 +0000 Subject: [PATCH 209/302] powerpc/mem: Remove address argument to flush_coherent_icache() flush_coherent_icache() can use any valid address as mentionned by the comment. Use PAGE_OFFSET as base address. This allows removing the user access stuff. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/742b6360ae4f344a1c6ecfadcf3b6645f443fa7a.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 742d3e0fb12f32..dc2d39da6f6306 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -5,10 +5,9 @@ /** * flush_coherent_icache() - if a CPU has a coherent icache, flush it - * @addr: The base address to use (can be any valid address, the whole cache will be flushed) * Return true if the cache was flushed, false otherwise */ -static inline bool flush_coherent_icache(unsigned long addr) +static inline bool flush_coherent_icache(void) { /* * For a snooping icache, we still need a dummy icbi to purge all the @@ -18,9 +17,7 @@ static inline bool flush_coherent_icache(unsigned long addr) */ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { mb(); /* sync */ - allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - icbi((void *)addr); - prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); + icbi((void *)PAGE_OFFSET); mb(); /* sync */ isync(); return true; @@ -60,7 +57,7 @@ static void invalidate_icache_range(unsigned long start, unsigned long stop) */ void flush_icache_range(unsigned long start, unsigned long stop) { - if (flush_coherent_icache(start)) + if (flush_coherent_icache()) return; clean_dcache_range(start, stop); @@ -146,7 +143,7 @@ static void __flush_dcache_icache(void *p) { unsigned long addr = (unsigned long)p; - if (flush_coherent_icache(addr)) + if (flush_coherent_icache()) return; clean_dcache_range(addr, addr + PAGE_SIZE); @@ -200,7 +197,7 @@ void flush_dcache_icache_page(struct page *page) } else { unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; - if (flush_coherent_icache(addr)) + if (flush_coherent_icache()) return; flush_dcache_icache_phys(addr); } From e618c7aea1f2a2d615a99948f1f5cb4c11b6bf57 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:28 +0000 Subject: [PATCH 210/302] powerpc/mem: Call flush_coherent_icache() at higher level flush_coherent_icache() doesn't need the address anymore, so it can be called immediately when entering the public functions and doesn't need to be disseminated among lower level functions. And use page_to_phys() instead of open coding the calculation of phys address to call flush_dcache_icache_phys(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5f063986e325d2efdd404b8f8c5f4bcbd4eb11a6.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index dc2d39da6f6306..811045c50d82f3 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -143,9 +143,6 @@ static void __flush_dcache_icache(void *p) { unsigned long addr = (unsigned long)p; - if (flush_coherent_icache()) - return; - clean_dcache_range(addr, addr + PAGE_SIZE); /* @@ -182,6 +179,8 @@ static void flush_dcache_icache_hugepage(struct page *page) void flush_dcache_icache_page(struct page *page) { + if (flush_coherent_icache()) + return; if (PageCompound(page)) return flush_dcache_icache_hugepage(page); @@ -195,11 +194,7 @@ void flush_dcache_icache_page(struct page *page) __flush_dcache_icache(start); kunmap_atomic(start); } else { - unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; - - if (flush_coherent_icache()) - return; - flush_dcache_icache_phys(addr); + flush_dcache_icache_phys(page_to_phys(page)); } #endif } From cd97d9e8b5aa45a7f867a10e99f1d6ce0a5deb8b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:29 +0000 Subject: [PATCH 211/302] powerpc/mem: Optimise flush_dcache_icache_hugepage() flush_dcache_icache_hugepage() is a static function, with only one caller. That caller calls it when PageCompound() is true, so bugging on !PageCompound() is useless if we can trust the compiler a little. Remove the BUG_ON(!PageCompound()). The number of elements of a page won't change over time, but GCC doesn't know about it, so it gets the value at every iteration. To avoid that, call compound_nr() outside the loop and save it in a local variable. Whether the page is a HIGHMEM page or not doesn't change over time. But GCC doesn't know it so it does the test on every iteration. Do the test outside the loop. When the page is not a HIGHMEM page, page_address() will fallback on lowmem_page_address(), so call lowmem_page_address() directly and don't suffer the call to page_address() on every iteration. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ab03712b70105fccfceef095aa03007de9295a40.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 811045c50d82f3..3268a3e55c3f7a 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -162,14 +162,14 @@ static void __flush_dcache_icache(void *p) static void flush_dcache_icache_hugepage(struct page *page) { int i; + int nr = compound_nr(page); void *start; - BUG_ON(!PageCompound(page)); - - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { + if (!PageHighMem(page)) { + for (i = 0; i < nr; i++) + __flush_dcache_icache(lowmem_page_address(page + i)); + } else { + for (i = 0; i < nr; i++) { start = kmap_atomic(page+i); __flush_dcache_icache(start); kunmap_atomic(start); From 52d490437ffb1bab0a63ab7b1a64514d8c17dd4d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:30 +0000 Subject: [PATCH 212/302] powerpc/mem: flush_dcache_icache_phys() is for HIGHMEM pages only __flush_dcache_icache() is usable for non HIGHMEM pages on every platform. It is only for HIGHMEM pages that BOOKE needs kmap() and BOOK3S needs flush_dcache_icache_phys(). So make flush_dcache_icache_phys() dependent on CONFIG_HIGHMEM and call it only when it is a HIGHMEM page. We could make flush_dcache_icache_phys() available at all time, but as it is declared NOKPROBE_SYMBOL(), GCC doesn't optimise it out when it is not used. So define a stub for !CONFIG_HIGHMEM in order to remove the #ifdef in flush_dcache_icache_page() and use IS_ENABLED() instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/79ed5d7914f497cd5fcd681ca2f4d50a91719455.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 3268a3e55c3f7a..2d92cb6bc42337 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -76,7 +76,7 @@ void flush_icache_range(unsigned long start, unsigned long stop) } EXPORT_SYMBOL(flush_icache_range); -#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) +#ifdef CONFIG_HIGHMEM /** * flush_dcache_icache_phys() - Flush a page by it's physical address * @physaddr: the physical address of the page @@ -115,7 +115,11 @@ static void flush_dcache_icache_phys(unsigned long physaddr) : "ctr", "memory"); } NOKPROBE_SYMBOL(flush_dcache_icache_phys) -#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) +#else +static void flush_dcache_icache_phys(unsigned long physaddr) +{ +} +#endif /* * This is called when a page has been modified by the kernel. @@ -185,18 +189,15 @@ void flush_dcache_icache_page(struct page *page) if (PageCompound(page)) return flush_dcache_icache_hugepage(page); -#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) - /* On 8xx there is no need to kmap since highmem is not supported */ - __flush_dcache_icache(page_address(page)); -#else - if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { + if (!PageHighMem(page)) { + __flush_dcache_icache(lowmem_page_address(page)); + } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { void *start = kmap_atomic(page); __flush_dcache_icache(start); kunmap_atomic(start); } else { flush_dcache_icache_phys(page_to_phys(page)); } -#endif } EXPORT_SYMBOL(flush_dcache_icache_page); From 67b8e6af191a6ed717be548307eb15048f8181d8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:31 +0000 Subject: [PATCH 213/302] powerpc/mem: Help GCC realise __flush_dcache_icache() flushes single pages 'And' the given page address with PAGE_MASK to help GCC. With the patch: 00000024 <__flush_dcache_icache>: 24: 54 63 00 26 rlwinm r3,r3,0,0,19 28: 39 40 00 40 li r10,64 2c: 7c 69 1b 78 mr r9,r3 30: 7d 49 03 a6 mtctr r10 34: 7c 00 48 6c dcbst 0,r9 38: 39 29 00 20 addi r9,r9,32 3c: 7c 00 48 6c dcbst 0,r9 40: 39 29 00 20 addi r9,r9,32 44: 42 00 ff f0 bdnz 34 <__flush_dcache_icache+0x10> 48: 7c 00 04 ac hwsync 4c: 39 20 00 40 li r9,64 50: 7d 29 03 a6 mtctr r9 54: 7c 00 1f ac icbi 0,r3 58: 38 63 00 20 addi r3,r3,32 5c: 7c 00 1f ac icbi 0,r3 60: 38 63 00 20 addi r3,r3,32 64: 42 00 ff f0 bdnz 54 <__flush_dcache_icache+0x30> 68: 7c 00 04 ac hwsync 6c: 4c 00 01 2c isync 70: 4e 80 00 20 blr Without the patch: 00000024 <__flush_dcache_icache>: 24: 54 6a 00 34 rlwinm r10,r3,0,0,26 28: 39 23 10 1f addi r9,r3,4127 2c: 7d 2a 48 50 subf r9,r10,r9 30: 55 29 d9 7f rlwinm. r9,r9,27,5,31 34: 41 82 00 94 beq c8 <__flush_dcache_icache+0xa4> 38: 71 28 00 01 andi. r8,r9,1 3c: 38 c9 ff ff addi r6,r9,-1 40: 7d 48 53 78 mr r8,r10 44: 7d 27 4b 78 mr r7,r9 48: 40 82 00 6c bne b4 <__flush_dcache_icache+0x90> 4c: 54 e7 f8 7e rlwinm r7,r7,31,1,31 50: 7c e9 03 a6 mtctr r7 54: 7c 00 40 6c dcbst 0,r8 58: 39 08 00 20 addi r8,r8,32 5c: 7c 00 40 6c dcbst 0,r8 60: 39 08 00 20 addi r8,r8,32 64: 42 00 ff f0 bdnz 54 <__flush_dcache_icache+0x30> 68: 7c 00 04 ac hwsync 6c: 71 28 00 01 andi. r8,r9,1 70: 39 09 ff ff addi r8,r9,-1 74: 40 82 00 2c bne a0 <__flush_dcache_icache+0x7c> 78: 55 29 f8 7e rlwinm r9,r9,31,1,31 7c: 7d 29 03 a6 mtctr r9 80: 7c 00 57 ac icbi 0,r10 84: 39 4a 00 20 addi r10,r10,32 88: 7c 00 57 ac icbi 0,r10 8c: 39 4a 00 20 addi r10,r10,32 90: 42 00 ff f0 bdnz 80 <__flush_dcache_icache+0x5c> 94: 7c 00 04 ac hwsync 98: 4c 00 01 2c isync 9c: 4e 80 00 20 blr a0: 7c 00 57 ac icbi 0,r10 a4: 2c 08 00 00 cmpwi r8,0 a8: 39 4a 00 20 addi r10,r10,32 ac: 40 82 ff cc bne 78 <__flush_dcache_icache+0x54> b0: 4b ff ff e4 b 94 <__flush_dcache_icache+0x70> b4: 7c 00 50 6c dcbst 0,r10 b8: 2c 06 00 00 cmpwi r6,0 bc: 39 0a 00 20 addi r8,r10,32 c0: 40 82 ff 8c bne 4c <__flush_dcache_icache+0x28> c4: 4b ff ff a4 b 68 <__flush_dcache_icache+0x44> c8: 7c 00 04 ac hwsync cc: 7c 00 04 ac hwsync d0: 4c 00 01 2c isync d4: 4e 80 00 20 blr Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/23030822ea5cd0a122948b10226abe56602dc027.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 2d92cb6bc42337..abeef69ed4e407 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -145,7 +145,7 @@ EXPORT_SYMBOL(flush_dcache_page); */ static void __flush_dcache_icache(void *p) { - unsigned long addr = (unsigned long)p; + unsigned long addr = (unsigned long)p & PAGE_MASK; clean_dcache_range(addr, addr + PAGE_SIZE); From 6c96020882b17fb6f4fbf7f8cef8c606460fc14d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:32 +0000 Subject: [PATCH 214/302] powerpc/mem: Inline flush_dcache_page() flush_dcache_page() is only a few lines, it is worth inlining. ia64, csky, mips, openrisc and riscv have a similar flush_dcache_page() and inline it. On pmac32_defconfig, we get a small size reduction. On ppc64_defconfig, we get a very small size increase. In both case that's in the noise (less than 0.1%). text data bss dec hex filename 18991155 5934744 1497624 26423523 19330e3 vmlinux64.before 18994829 5936732 1497624 26429185 1934701 vmlinux64.after 9150963 2467502 184548 11803013 b41985 vmlinux32.before 9149689 2467302 184548 11801539 b413c3 vmlinux32.after Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/21c417488b70b7629dae316539fb7bb8bdef4fdd.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cacheflush.h | 14 +++++++++++++- arch/powerpc/mm/cacheflush.c | 15 --------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 9110489ea411fe..7564dd4fd12b7e 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -30,7 +30,19 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) #endif /* CONFIG_PPC_BOOK3S_64 */ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page *page); +/* + * This is called when a page has been modified by the kernel. + * It just marks the page as not i-cache clean. We do the i-cache + * flush later when the page is given to a user process, if necessary. + */ +static inline void flush_dcache_page(struct page *page) +{ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* avoid an atomic op if possible */ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} void flush_icache_range(unsigned long start, unsigned long stop); #define flush_icache_range flush_icache_range diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index abeef69ed4e407..d9eafa077c0941 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -121,21 +121,6 @@ static void flush_dcache_icache_phys(unsigned long physaddr) } #endif -/* - * This is called when a page has been modified by the kernel. - * It just marks the page as not i-cache clean. We do the i-cache - * flush later when the page is given to a user process, if necessary. - */ -void flush_dcache_page(struct page *page) -{ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - return; - /* avoid an atomic op if possible */ - if (test_bit(PG_dcache_clean, &page->flags)) - clear_bit(PG_dcache_clean, &page->flags); -} -EXPORT_SYMBOL(flush_dcache_page); - /** * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. * Note: this is necessary because the instruction cache does *not* From 7e9ab144c128df7660a2f33c9c6d1422fe798060 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:33 +0000 Subject: [PATCH 215/302] powerpc/mem: Use kmap_local_page() in flushing functions Flushing functions don't rely on preemption being disabled, so use kmap_local_page() instead of kmap_atomic(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b6a880ea0ec7886b51edbb4979c188be549231c0.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index d9eafa077c0941..63363787e00076 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -152,16 +152,16 @@ static void flush_dcache_icache_hugepage(struct page *page) { int i; int nr = compound_nr(page); - void *start; if (!PageHighMem(page)) { for (i = 0; i < nr; i++) __flush_dcache_icache(lowmem_page_address(page + i)); } else { for (i = 0; i < nr; i++) { - start = kmap_atomic(page+i); + void *start = kmap_local_page(page + i); + __flush_dcache_icache(start); - kunmap_atomic(start); + kunmap_local(start); } } } @@ -177,9 +177,10 @@ void flush_dcache_icache_page(struct page *page) if (!PageHighMem(page)) { __flush_dcache_icache(lowmem_page_address(page)); } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { - void *start = kmap_atomic(page); + void *start = kmap_local_page(page); + __flush_dcache_icache(start); - kunmap_atomic(start); + kunmap_local(start); } else { flush_dcache_icache_phys(page_to_phys(page)); } @@ -225,9 +226,9 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len) { - unsigned long maddr; + void *maddr; - maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); - flush_icache_range(maddr, maddr + len); - kunmap(page); + maddr = kmap_local_page(page) + (addr & ~PAGE_MASK); + flush_icache_range((unsigned long)maddr, (unsigned long)maddr + len); + kunmap_local(maddr); } From 59fd366b9bef2d048af763e27cd1622ee5a1dfd4 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Thu, 8 Apr 2021 14:20:12 +0800 Subject: [PATCH 216/302] powerpc/fadump: make symbol 'rtas_fadump_set_regval' static Fix sparse warnings: arch/powerpc/platforms/pseries/rtas-fadump.c:250:6: warning: symbol 'rtas_fadump_set_regval' was not declared. Should it be static? Signed-off-by: Pu Lehui Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210408062012.85973-1-pulehui@huawei.com --- arch/powerpc/platforms/pseries/rtas-fadump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index 81343908ed3346..f8f73b47b10794 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -247,7 +247,7 @@ static inline int rtas_fadump_gpr_index(u64 id) return i; } -void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) +static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) { int i; From 2e2a441d2c0bb639b6fdbb64b15ee0a43599bcec Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 8 Apr 2021 13:15:03 +0530 Subject: [PATCH 217/302] powerpc/perf: Infrastructure to support checking of attr.config* Introduce code to support the checking of attr.config* for values which are reserved for a given platform. Performance Monitoring Unit (PMU) configuration registers have fields that are reserved and some specific values for bit fields are reserved. For ex., MMCRA[61:62] is Random Sampling Mode (SM) and value of 0b11 for this field is reserved. Writing non-zero or invalid values in these fields will have unknown behaviours. Patch adds a generic call-back function "check_attr_config" in "struct power_pmu", to be called in event_init to check for attr.config* values for a given platform. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210408074504.248211-1-maddy@linux.ibm.com --- arch/powerpc/include/asm/perf_event_server.h | 6 ++++++ arch/powerpc/perf/core-book3s.c | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 00e7e671bb4ba9..dde97d7d92532e 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -67,6 +67,12 @@ struct power_pmu { * the pmu supports extended perf regs capability */ int capabilities; + /* + * Function to check event code for values which are + * reserved. Function takes struct perf_event as input, + * since event code could be spread in attr.config* + */ + int (*check_attr_config)(struct perf_event *ev); }; /* diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 766f064f00fbf2..b17358e8dc12c1 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1963,6 +1963,17 @@ static int power_pmu_event_init(struct perf_event *event) return -ENOENT; } + /* + * PMU config registers have fields that are + * reserved and some specific values for bit fields are reserved. + * For ex., MMCRA[61:62] is Randome Sampling Mode (SM) + * and value of 0b11 to this field is reserved. + * Check for invalid values in attr.config. + */ + if (ppmu->check_attr_config && + ppmu->check_attr_config(event)) + return -EINVAL; + event->hw.config_base = ev; event->hw.idx = 0; From 5a5a893c4ad897b8a36f846602895515b7407a71 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:41:55 +1000 Subject: [PATCH 218/302] powerpc/syscall: switch user_exit_irqoff and trace_hardirqs_off order user_exit_irqoff() -> __context_tracking_exit -> vtime_user_exit warns in __seqprop_assert due to lockdep thinking preemption is enabled because trace_hardirqs_off() has not yet been called. Switch the order of these two calls, which matches their ordering in interrupt_enter_prepare. Fixes: 5f0b6ac3905f ("powerpc/64/syscall: Reconcile interrupts") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-2-npiggin@gmail.com --- arch/powerpc/kernel/interrupt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index c4dd4b8f9cfa5a..fbabb49888d322 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -43,11 +43,11 @@ notrace long system_call_exception(long r3, long r4, long r5, if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + trace_hardirqs_off(); /* finish reconciling */ + CT_WARN_ON(ct_state() == CONTEXT_KERNEL); user_exit_irqoff(); - trace_hardirqs_off(); /* finish reconciling */ - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); From 4228b2c3d20e9f80b847f809c38e6cf82864fa50 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:41:56 +1000 Subject: [PATCH 219/302] powerpc/64e/interrupt: always save nvgprs on interrupt In order to use the C interrupt return, nvgprs must always be saved. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-3-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 9 +-------- arch/powerpc/kernel/entry_64.S | 13 ------------- arch/powerpc/kernel/exceptions-64e.S | 27 +++------------------------ 3 files changed, 4 insertions(+), 45 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 95600f3a6523a5..c93511bf6b3bb3 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -186,18 +186,11 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1) #ifdef __powerpc64__ -#ifdef CONFIG_PPC_BOOK3S #define TRAP_FLAGS_MASK 0x10 #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) #define FULL_REGS(regs) true #define SET_FULL_REGS(regs) do { } while (0) -#else -#define TRAP_FLAGS_MASK 0x11 -#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -#define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) -#endif -#define CHECK_FULL_REGS(regs) BUG_ON(!FULL_REGS(regs)) +#define CHECK_FULL_REGS(regs) do { } while (0) #define NV_REG_POISON 0xdeadbeefdeadbeefUL #else /* diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6c4d9e276c4d5b..853534b2ae2ebb 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -417,19 +417,6 @@ _GLOBAL(ret_from_kernel_thread) li r3,0 b .Lsyscall_exit -#ifdef CONFIG_PPC_BOOK3E -/* Save non-volatile GPRs, if not already saved. */ -_GLOBAL(save_nvgprs) - ld r11,_TRAP(r1) - andi. r0,r11,1 - beqlr- - SAVE_NVGPRS(r1) - clrrdi r0,r11,1 - std r0,_TRAP(r1) - blr -_ASM_NOKPROBE_SYMBOL(save_nvgprs); -#endif - #ifdef CONFIG_PPC_BOOK3S_64 #define FLUSH_COUNT_CACHE \ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index e8eb9992a27038..a7d9ce9f7fdbf2 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -417,14 +417,15 @@ exc_##n##_common: \ std r6,_LINK(r1); \ std r7,_CTR(r1); \ std r8,_XER(r1); \ - li r3,(n)+1; /* indicate partial regs in trap */ \ + li r3,(n); /* regs.trap vector */ \ std r9,0(r1); /* store stack frame back link */ \ std r10,_CCR(r1); /* store orig CR in stackframe */ \ std r9,GPR1(r1); /* store stack frame back link */ \ std r11,SOFTE(r1); /* and save it to stackframe */ \ std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ std r3,_TRAP(r1); /* set trap number */ \ - std r0,RESULT(r1); /* clear regs->result */ + std r0,RESULT(r1); /* clear regs->result */ \ + SAVE_NVGPRS(r1); #define EXCEPTION_COMMON(n) \ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN) @@ -561,7 +562,6 @@ __end_interrupts: CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x100) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD @@ -573,7 +573,6 @@ __end_interrupts: MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_MC(0x000) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD @@ -623,7 +622,6 @@ __end_interrupts: std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) - bl save_nvgprs bl program_check_exception b ret_from_except @@ -639,7 +637,6 @@ __end_interrupts: bl load_up_fpu b fast_exception_return 1: INTS_DISABLE - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception b ret_from_except @@ -661,7 +658,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif INTS_DISABLE - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception b ret_from_except @@ -673,7 +669,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x220) INTS_DISABLE - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION @@ -698,7 +693,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x9f0) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD @@ -723,7 +717,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0xf20) INTS_DISABLE - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception b ret_from_except @@ -792,7 +785,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) - bl save_nvgprs bl DebugException b ret_from_except @@ -864,7 +856,6 @@ kernel_dbg_exc: addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) - bl save_nvgprs bl DebugException b ret_from_except @@ -887,7 +878,6 @@ kernel_dbg_exc: CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x2a0) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD @@ -903,7 +893,6 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x2c0) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs INTS_RESTORE_HARD bl unknown_exception b ret_from_except @@ -913,7 +902,6 @@ kernel_dbg_exc: CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x2e0) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD @@ -926,7 +914,6 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x310) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs INTS_RESTORE_HARD bl unknown_exception b ret_from_except @@ -937,7 +924,6 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x320) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs INTS_RESTORE_HARD bl unknown_exception b ret_from_except @@ -948,7 +934,6 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x340) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs INTS_RESTORE_HARD bl unknown_exception b ret_from_except @@ -1014,7 +999,6 @@ storage_fault_common: cmpdi r3,0 bne- 1f b ret_from_except_lite -1: bl save_nvgprs mr r4,r3 addi r3,r1,STACK_FRAME_OVERHEAD bl __bad_page_fault @@ -1030,16 +1014,12 @@ alignment_more: addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) - bl save_nvgprs INTS_RESTORE_HARD bl alignment_exception b ret_from_except .align 7 _GLOBAL(ret_from_except) - ld r11,_TRAP(r1) - andi. r0,r11,1 - bne ret_from_except_lite REST_NVGPRS(r1) _GLOBAL(ret_from_except_lite) @@ -1080,7 +1060,6 @@ _GLOBAL(ret_from_except_lite) SCHEDULE_USER b ret_from_except_lite 2: - bl save_nvgprs /* * Use a non volatile GPR to save and restore our thread_info flags * across the call to restore_interrupts. From dc6231821a148d0392292924fdae5b34679af6b2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:41:57 +1000 Subject: [PATCH 220/302] powerpc/interrupt: update common interrupt code for This makes adjustments to 64-bit asm and common C interrupt return code to be usable by the 64e subarchitecture. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-4-npiggin@gmail.com --- arch/powerpc/kernel/entry_64.S | 9 +++++++-- arch/powerpc/kernel/interrupt.c | 35 ++++++++++++++++++++------------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 853534b2ae2ebb..555b3d0a3f38e6 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -632,7 +632,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) addi r1,r1,SWITCH_FRAME_SIZE blr -#ifdef CONFIG_PPC_BOOK3S /* * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not * touched, no exit work created, then this can be used. @@ -644,6 +643,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) kuap_check_amr r3, r4 ld r5,_MSR(r1) andi. r0,r5,MSR_PR +#ifdef CONFIG_PPC_BOOK3S bne .Lfast_user_interrupt_return_amr kuap_kernel_restore r3, r4 andi. r0,r5,MSR_RI @@ -652,6 +652,10 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) addi r3,r1,STACK_FRAME_OVERHEAD bl unrecoverable_exception b . /* should not get here */ +#else + bne .Lfast_user_interrupt_return + b .Lfast_kernel_interrupt_return +#endif .balign IFETCH_ALIGN_BYTES .globl interrupt_return @@ -665,8 +669,10 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) cmpdi r3,0 bne- .Lrestore_nvgprs +#ifdef CONFIG_PPC_BOOK3S .Lfast_user_interrupt_return_amr: kuap_user_restore r3, r4 +#endif .Lfast_user_interrupt_return: ld r11,_NIP(r1) ld r12,_MSR(r1) @@ -775,7 +781,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) RFI_TO_KERNEL b . /* prevent speculative execution */ -#endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_RTAS /* diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index fbabb49888d322..381a618b5b5b06 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -235,6 +235,10 @@ static notrace void booke_load_dbcr0(void) #endif } +/* temporary hack for context tracking, removed in later patch */ +#include +asmlinkage __visible void __sched schedule_user(void); + /* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call @@ -292,7 +296,11 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); if (ti_flags & _TIF_NEED_RESCHED) { +#ifdef CONFIG_PPC_BOOK3E_64 + schedule_user(); +#else schedule(); +#endif } else { /* * SIGPENDING must restore signal handler function @@ -349,18 +357,13 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, account_cpu_user_exit(); -#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not using this */ - /* - * We do this at the end so that we do context switch with KERNEL AMR - */ + /* Restore user access locks last */ kuap_user_restore(regs); -#endif kuep_unlock(); return ret; } -#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) { unsigned long ti_flags; @@ -372,7 +375,9 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(!FULL_REGS(regs)); BUG_ON(arch_irq_disabled_regs(regs)); +#ifdef CONFIG_PPC_BOOK3S_64 CT_WARN_ON(ct_state() == CONTEXT_USER); +#endif /* * We don't need to restore AMR on the way back to userspace for KUAP. @@ -387,7 +392,11 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); /* returning to user: may enable */ if (ti_flags & _TIF_NEED_RESCHED) { +#ifdef CONFIG_PPC_BOOK3E_64 + schedule_user(); +#else schedule(); +#endif } else { if (ti_flags & _TIF_SIGPENDING) ret |= _TIF_RESTOREALL; @@ -432,10 +441,9 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned account_cpu_user_exit(); - /* - * We do this at the end so that we do context switch with KERNEL AMR - */ + /* Restore user access locks last */ kuap_user_restore(regs); + return ret; } @@ -456,7 +464,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != 0x700) + if (IS_ENABLED(CONFIG_BOOKS) && TRAP(regs) != 0x700) CT_WARN_ON(ct_state() == CONTEXT_USER); kuap = kuap_get_and_assert_locked(); @@ -497,12 +505,11 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign #endif /* - * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr, - * which would cause Read-After-Write stalls. Hence, we take the AMR - * value from the check above. + * 64s does not want to mfspr(SPRN_AMR) here, because this comes after + * mtmsr, which would cause Read-After-Write stalls. Hence, take the + * AMR value from the check above. */ kuap_kernel_restore(regs, kuap); return ret; } -#endif From 0c2472de23aea5ce9139a3e887191925759d1259 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:41:58 +1000 Subject: [PATCH 221/302] powerpc/64e/interrupt: use new interrupt return Update the new C and asm interrupt return code to account for 64e specifics, switch over to use it. The now-unused old ret_from_except code, that was moved to 64e after the 64s conversion, is removed. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-5-npiggin@gmail.com --- arch/powerpc/include/asm/asm-prototypes.h | 2 - arch/powerpc/include/asm/ppc_asm.h | 20 -- arch/powerpc/kernel/asm-offsets.c | 10 - arch/powerpc/kernel/exceptions-64e.S | 321 ++-------------------- arch/powerpc/kernel/irq.c | 76 ----- 5 files changed, 25 insertions(+), 404 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 939f3c94c8f392..1c7b75834e045c 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -77,8 +77,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low); long sys_switch_endian(void); -notrace unsigned int __check_irq_replay(void); -void notrace restore_interrupts(void); /* prom_init (OpenFirmware) */ unsigned long __init prom_init(unsigned long r3, unsigned long r4, diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 8998122fc7e22d..d6739d700f0a3b 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -15,26 +15,6 @@ #define SZL (BITS_PER_LONG/8) -/* - * Stuff for accurate CPU time accounting. - * These macros handle transitions between user and system state - * in exception entry and exit and accumulate time to the - * user_time and system_time fields in the paca. - */ - -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) -#else -#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) \ - MFTB(ra); /* get timebase */ \ - PPC_LL rb, ACCOUNT_STARTTIME(ptr); \ - PPC_STL ra, ACCOUNT_STARTTIME_USER(ptr); \ - subf rb,rb,ra; /* subtract start value */ \ - PPC_LL ra, ACCOUNT_SYSTEM_TIME(ptr); \ - add ra,ra,rb; /* add on to system time */ \ - PPC_STL ra, ACCOUNT_SYSTEM_TIME(ptr) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ - /* * Macros for storing registers into and loading registers from * exception frames. diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d2f1b94e944d98..28af4efb458701 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -282,21 +282,11 @@ int main(void) OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default); - OFFSET(ACCOUNT_STARTTIME, paca_struct, accounting.starttime); - OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user); - OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime); - OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime); #ifdef CONFIG_PPC_BOOK3E OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); #endif OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); #else /* CONFIG_PPC64 */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - OFFSET(ACCOUNT_STARTTIME, thread_info, accounting.starttime); - OFFSET(ACCOUNT_STARTTIME_USER, thread_info, accounting.starttime_user); - OFFSET(ACCOUNT_USER_TIME, thread_info, accounting.utime); - OFFSET(ACCOUNT_SYSTEM_TIME, thread_info, accounting.stime); -#endif #endif /* CONFIG_PPC64 */ /* RTAS */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index a7d9ce9f7fdbf2..7a2e7be330926b 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -139,7 +139,8 @@ ret_from_level_except: ld r3,_MSR(r1) andi. r3,r3,MSR_PR beq 1f - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return 1: LOAD_REG_ADDR(r11,extlb_level_exc) @@ -208,7 +209,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) /* * Restore PACAIRQHAPPENED rather than setting it based on * the return MSR[EE], since we could have interrupted - * __check_irq_replay() or other inconsistent transitory + * interrupt replay or other inconsistent transitory * states that must remain that way. */ SPECIAL_EXC_LOAD(r10,IRQHAPPENED) @@ -511,7 +512,7 @@ exc_##n##_bad_stack: \ CHECK_NAPPING(); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ - b ret_from_except_lite; + b interrupt_return /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" @@ -623,7 +624,8 @@ __end_interrupts: addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) bl program_check_exception - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return /* Floating Point Unavailable Interrupt */ START_EXCEPTION(fp_unavailable); @@ -635,11 +637,11 @@ __end_interrupts: andi. r0,r12,MSR_PR; beq- 1f bl load_up_fpu - b fast_exception_return + b fast_interrupt_return 1: INTS_DISABLE addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception - b ret_from_except + b interrupt_return /* Altivec Unavailable Interrupt */ START_EXCEPTION(altivec_unavailable); @@ -653,14 +655,14 @@ BEGIN_FTR_SECTION andi. r0,r12,MSR_PR; beq- 1f bl load_up_altivec - b fast_exception_return + b fast_interrupt_return 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif INTS_DISABLE addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception - b ret_from_except + b interrupt_return /* AltiVec Assist */ START_EXCEPTION(altivec_assist); @@ -674,10 +676,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) BEGIN_FTR_SECTION bl altivec_assist_exception END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + REST_NVGPRS(r1) #else bl unknown_exception #endif - b ret_from_except + b interrupt_return /* Decrementer Interrupt */ @@ -719,7 +722,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) INTS_DISABLE addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception - b ret_from_except + b interrupt_return /* Debug exception as a critical interrupt*/ START_EXCEPTION(debug_crit); @@ -786,7 +789,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) bl DebugException - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return kernel_dbg_exc: b . /* NYI */ @@ -857,7 +861,8 @@ kernel_dbg_exc: ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) bl DebugException - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return START_EXCEPTION(perfmon); NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR, @@ -867,7 +872,7 @@ kernel_dbg_exc: CHECK_NAPPING() addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception - b ret_from_except_lite + b interrupt_return /* Doorbell interrupt */ MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL, @@ -895,7 +900,7 @@ kernel_dbg_exc: addi r3,r1,STACK_FRAME_OVERHEAD INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* Guest Doorbell critical Interrupt */ START_EXCEPTION(guest_doorbell_crit); @@ -916,7 +921,7 @@ kernel_dbg_exc: addi r3,r1,STACK_FRAME_OVERHEAD INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* Embedded Hypervisor priviledged */ START_EXCEPTION(ehpriv); @@ -926,7 +931,7 @@ kernel_dbg_exc: addi r3,r1,STACK_FRAME_OVERHEAD INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* LRAT Error interrupt */ START_EXCEPTION(lrat_error); @@ -936,7 +941,7 @@ kernel_dbg_exc: addi r3,r1,STACK_FRAME_OVERHEAD INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* * An interrupt came in while soft-disabled; We mark paca->irq_happened @@ -998,11 +1003,11 @@ storage_fault_common: bl do_page_fault cmpdi r3,0 bne- 1f - b ret_from_except_lite + b interrupt_return mr r4,r3 addi r3,r1,STACK_FRAME_OVERHEAD bl __bad_page_fault - b ret_from_except + b interrupt_return /* * Alignment exception doesn't fit entirely in the 0x100 bytes so it @@ -1016,284 +1021,8 @@ alignment_more: ld r15,PACA_EXGEN+EX_R15(r13) INTS_RESTORE_HARD bl alignment_exception - b ret_from_except - - .align 7 -_GLOBAL(ret_from_except) REST_NVGPRS(r1) - -_GLOBAL(ret_from_except_lite) - /* - * Disable interrupts so that current_thread_info()->flags - * can't change between when we test it and when we return - * from the interrupt. - */ - wrteei 0 - - ld r9, PACA_THREAD_INFO(r13) - ld r3,_MSR(r1) - ld r10,PACACURRENT(r13) - ld r4,TI_FLAGS(r9) - andi. r3,r3,MSR_PR - beq resume_kernel - lwz r3,(THREAD+THREAD_DBCR0)(r10) - - /* Check current_thread_info()->flags */ - andi. r0,r4,_TIF_USER_WORK_MASK - bne 1f - /* - * Check to see if the dbcr0 register is set up to debug. - * Use the internal debug mode bit to do this. - */ - andis. r0,r3,DBCR0_IDM@h - beq restore - mfmsr r0 - rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */ - mtmsr r0 - mtspr SPRN_DBCR0,r3 - li r10, -1 - mtspr SPRN_DBSR,r10 - b restore -1: andi. r0,r4,_TIF_NEED_RESCHED - beq 2f - bl restore_interrupts - SCHEDULE_USER - b ret_from_except_lite -2: - /* - * Use a non volatile GPR to save and restore our thread_info flags - * across the call to restore_interrupts. - */ - mr r30,r4 - bl restore_interrupts - mr r4,r30 - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_notify_resume - b ret_from_except - -resume_kernel: - /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ - andis. r8,r4,_TIF_EMULATE_STACK_STORE@h - beq+ 1f - - addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ - - ld r3,GPR1(r1) - subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ - mr r4,r1 /* src: current exception frame */ - mr r1,r3 /* Reroute the trampoline frame to r1 */ - - /* Copy from the original to the trampoline. */ - li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */ - li r6,0 /* start offset: 0 */ - mtctr r5 -2: ldx r0,r6,r4 - stdx r0,r6,r3 - addi r6,r6,8 - bdnz 2b - - /* Do real store operation to complete stdu */ - ld r5,GPR1(r1) - std r8,0(r5) - - /* Clear _TIF_EMULATE_STACK_STORE flag */ - lis r11,_TIF_EMULATE_STACK_STORE@h - addi r5,r9,TI_FLAGS -0: ldarx r4,0,r5 - andc r4,r4,r11 - stdcx. r4,0,r5 - bne- 0b -1: - -#ifdef CONFIG_PREEMPT - /* Check if we need to preempt */ - andi. r0,r4,_TIF_NEED_RESCHED - beq+ restore - /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) - cmpwi cr0,r8,0 - bne restore - ld r0,SOFTE(r1) - andi. r0,r0,IRQS_DISABLED - bne restore - - /* - * Here we are preempting the current task. We want to make - * sure we are soft-disabled first and reconcile irq state. - */ - RECONCILE_IRQ_STATE(r3,r4) - bl preempt_schedule_irq - - /* - * arch_local_irq_restore() from preempt_schedule_irq above may - * enable hard interrupt but we really should disable interrupts - * when we return from the interrupt, and so that we don't get - * interrupted after loading SRR0/1. - */ - wrteei 0 -#endif /* CONFIG_PREEMPT */ - -restore: - /* - * This is the main kernel exit path. First we check if we - * are about to re-enable interrupts - */ - ld r5,SOFTE(r1) - lbz r6,PACAIRQSOFTMASK(r13) - andi. r5,r5,IRQS_DISABLED - bne .Lrestore_irq_off - - /* We are enabling, were we already enabled ? Yes, just return */ - andi. r6,r6,IRQS_DISABLED - beq cr0,fast_exception_return - - /* - * We are about to soft-enable interrupts (we are hard disabled - * at this point). We check if there's anything that needs to - * be replayed first. - */ - lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 - bne- .Lrestore_check_irq_replay - - /* - * Get here when nothing happened while soft-disabled, just - * soft-enable and move-on. We will hard-enable as a side - * effect of rfi - */ -.Lrestore_no_replay: - TRACE_ENABLE_INTS - li r0,IRQS_ENABLED - stb r0,PACAIRQSOFTMASK(r13); - -/* This is the return from load_up_fpu fast path which could do with - * less GPR restores in fact, but for now we have a single return path - */ -fast_exception_return: - wrteei 0 -1: mr r0,r13 - ld r10,_MSR(r1) - REST_4GPRS(2, r1) - andi. r6,r10,MSR_PR - REST_2GPRS(6, r1) - beq 1f - ACCOUNT_CPU_USER_EXIT(r13, r10, r11) - ld r0,GPR13(r1) - -1: stdcx. r0,0,r1 /* to clear the reservation */ - - ld r8,_CCR(r1) - ld r9,_LINK(r1) - ld r10,_CTR(r1) - ld r11,_XER(r1) - mtcr r8 - mtlr r9 - mtctr r10 - mtxer r11 - REST_2GPRS(8, r1) - ld r10,GPR10(r1) - ld r11,GPR11(r1) - ld r12,GPR12(r1) - mtspr SPRN_SPRG_GEN_SCRATCH,r0 - - std r10,PACA_EXGEN+EX_R10(r13); - std r11,PACA_EXGEN+EX_R11(r13); - ld r10,_NIP(r1) - ld r11,_MSR(r1) - ld r0,GPR0(r1) - ld r1,GPR1(r1) - mtspr SPRN_SRR0,r10 - mtspr SPRN_SRR1,r11 - ld r10,PACA_EXGEN+EX_R10(r13) - ld r11,PACA_EXGEN+EX_R11(r13) - mfspr r13,SPRN_SPRG_GEN_SCRATCH - rfi - - /* - * We are returning to a context with interrupts soft disabled. - * - * However, we may also about to hard enable, so we need to - * make sure that in this case, we also clear PACA_IRQ_HARD_DIS - * or that bit can get out of sync and bad things will happen - */ -.Lrestore_irq_off: - ld r3,_MSR(r1) - lbz r7,PACAIRQHAPPENED(r13) - andi. r0,r3,MSR_EE - beq 1f - rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS - stb r7,PACAIRQHAPPENED(r13) -1: -#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) - /* The interrupt should not have soft enabled. */ - lbz r7,PACAIRQSOFTMASK(r13) -1: tdeqi r7,IRQS_ENABLED - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING -#endif - b fast_exception_return - - /* - * Something did happen, check if a re-emit is needed - * (this also clears paca->irq_happened) - */ -.Lrestore_check_irq_replay: - /* XXX: We could implement a fast path here where we check - * for irq_happened being just 0x01, in which case we can - * clear it and return. That means that we would potentially - * miss a decrementer having wrapped all the way around. - * - * Still, this might be useful for things like hash_page - */ - bl __check_irq_replay - cmpwi cr0,r3,0 - beq .Lrestore_no_replay - - /* - * We need to re-emit an interrupt. We do so by re-using our - * existing exception frame. We first change the trap value, - * but we need to ensure we preserve the low nibble of it - */ - ld r4,_TRAP(r1) - clrldi r4,r4,60 - or r4,r4,r3 - std r4,_TRAP(r1) - - /* - * PACA_IRQ_HARD_DIS won't always be set here, so set it now - * to reconcile the IRQ state. Tracing is already accounted for. - */ - lbz r4,PACAIRQHAPPENED(r13) - ori r4,r4,PACA_IRQ_HARD_DIS - stb r4,PACAIRQHAPPENED(r13) - - /* - * Then find the right handler and call it. Interrupts are - * still soft-disabled and we keep them that way. - */ - cmpwi cr0,r3,0x500 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl do_IRQ - b ret_from_except -1: cmpwi cr0,r3,0x900 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl timer_interrupt - b ret_from_except -#ifdef CONFIG_PPC_DOORBELL -1: - cmpwi cr0,r3,0x280 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl doorbell_exception -#endif /* CONFIG_PPC_DOORBELL */ -1: b ret_from_except /* What else to do here ? */ - -_ASM_NOKPROBE_SYMBOL(ret_from_except); -_ASM_NOKPROBE_SYMBOL(ret_from_except_lite); -_ASM_NOKPROBE_SYMBOL(resume_kernel); -_ASM_NOKPROBE_SYMBOL(restore); -_ASM_NOKPROBE_SYMBOL(fast_exception_return); + b interrupt_return /* * Trampolines used when spotting a bad kernel stack pointer in diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 260effc0a435d8..893d3f8d6f4729 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -104,82 +104,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -#ifdef CONFIG_PPC_BOOK3E - -/* This is called whenever we are re-enabling interrupts - * and returns either 0 (nothing to do) or 500/900/280 if - * there's an EE, DEC or DBELL to generate. - * - * This is called in two contexts: From arch_local_irq_restore() - * before soft-enabling interrupts, and from the exception exit - * path when returning from an interrupt from a soft-disabled to - * a soft enabled context. In both case we have interrupts hard - * disabled. - * - * We take care of only clearing the bits we handled in the - * PACA irq_happened field since we can only re-emit one at a - * time and we don't want to "lose" one. - */ -notrace unsigned int __check_irq_replay(void) -{ - /* - * We use local_paca rather than get_paca() to avoid all - * the debug_smp_processor_id() business in this low level - * function - */ - unsigned char happened = local_paca->irq_happened; - - /* - * We are responding to the next interrupt, so interrupt-off - * latencies should be reset here. - */ - trace_hardirqs_on(); - trace_hardirqs_off(); - - if (happened & PACA_IRQ_DEC) { - local_paca->irq_happened &= ~PACA_IRQ_DEC; - return 0x900; - } - - if (happened & PACA_IRQ_EE) { - local_paca->irq_happened &= ~PACA_IRQ_EE; - return 0x500; - } - - if (happened & PACA_IRQ_DBELL) { - local_paca->irq_happened &= ~PACA_IRQ_DBELL; - return 0x280; - } - - if (happened & PACA_IRQ_HARD_DIS) - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - - /* There should be nothing left ! */ - BUG_ON(local_paca->irq_happened != 0); - - return 0; -} - -/* - * This is specifically called by assembly code to re-enable interrupts - * if they are currently disabled. This is typically called before - * schedule() or do_signal() when returning to userspace. We do it - * in C to avoid the burden of dealing with lockdep etc... - * - * NOTE: This is called with interrupts hard disabled but not marked - * as such in paca->irq_happened, so we need to resync this. - */ -void notrace restore_interrupts(void) -{ - if (irqs_disabled()) { - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - local_irq_enable(); - } else - __hard_irq_enable(); -} - -#endif /* CONFIG_PPC_BOOK3E */ - void replay_soft_interrupts(void) { struct pt_regs regs; From 3db8aa10de9a478b3086db7894e0266def3d77af Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:41:59 +1000 Subject: [PATCH 222/302] powerpc/64e/interrupt: NMI save irq soft-mask state in C 64e non-maskable interrupts save the state of the irq soft-mask in asm. This can be done in C in interrupt wrappers as 64s does. I haven't been able to test this with qemu because it doesn't seem to cause FSL bookE WDT interrupts. This makes WatchdogException an NMI interrupt, which affects 32-bit as well (okay, or create a new handler?) Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-6-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 32 +++++++++++++++++-------- arch/powerpc/kernel/exceptions-64e.S | 36 ++++------------------------ arch/powerpc/kernel/traps.c | 13 +++++++++- 3 files changed, 38 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 05e7fc4ffb50d2..b17a55062bb80d 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -149,18 +149,32 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int struct interrupt_nmi_state { #ifdef CONFIG_PPC64 -#ifdef CONFIG_PPC_BOOK3S_64 u8 irq_soft_mask; u8 irq_happened; -#endif u8 ftrace_enabled; #endif }; +static inline bool nmi_disables_ftrace(struct pt_regs *regs) +{ + /* Allow DEC and PMI to be traced when they are soft-NMI */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { + if (TRAP(regs) == 0x900) + return false; + if (TRAP(regs) == 0xf00) + return false; + } + if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { + if (TRAP(regs) == 0x260) + return false; + } + + return true; +} + static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) { #ifdef CONFIG_PPC64 -#ifdef CONFIG_PPC_BOOK3S_64 state->irq_soft_mask = local_paca->irq_soft_mask; state->irq_happened = local_paca->irq_happened; @@ -173,9 +187,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte local_paca->irq_happened |= PACA_IRQ_HARD_DIS; /* Don't do any per-CPU operations until interrupt state is fixed */ -#endif - /* Allow DEC and PMI to be traced when they are soft-NMI */ - if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) { + + if (nmi_disables_ftrace(regs)) { state->ftrace_enabled = this_cpu_get_ftrace_enabled(); this_cpu_set_ftrace_enabled(0); } @@ -204,16 +217,14 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter */ #ifdef CONFIG_PPC64 - if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) + if (nmi_disables_ftrace(regs)) this_cpu_set_ftrace_enabled(state->ftrace_enabled); -#ifdef CONFIG_PPC_BOOK3S_64 /* Check we didn't change the pending interrupt mask. */ WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened); local_paca->irq_happened = state->irq_happened; local_paca->irq_soft_mask = state->irq_soft_mask; #endif -#endif } /* @@ -426,6 +437,7 @@ DECLARE_INTERRUPT_HANDLER(SMIException); DECLARE_INTERRUPT_HANDLER(handle_hmi_exception); DECLARE_INTERRUPT_HANDLER(unknown_exception); DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception); +DECLARE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception); DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception); DECLARE_INTERRUPT_HANDLER(RunModeException); DECLARE_INTERRUPT_HANDLER(single_step_exception); @@ -449,7 +461,7 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception); DECLARE_INTERRUPT_HANDLER(CacheLockingException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException); -DECLARE_INTERRUPT_HANDLER(WatchdogException); +DECLARE_INTERRUPT_HANDLER_NMI(WatchdogException); DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); /* slb.c */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 7a2e7be330926b..18be576fc0b3e8 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -63,9 +63,6 @@ ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1) special_reg_save: - lbz r9,PACAIRQHAPPENED(r13) - RECONCILE_IRQ_STATE(r3,r4) - /* * We only need (or have stack space) to save this stuff if * we interrupted the kernel. @@ -119,15 +116,11 @@ BEGIN_FTR_SECTION mtspr SPRN_MAS5,r10 mtspr SPRN_MAS8,r10 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) - SPECIAL_EXC_STORE(r9,IRQHAPPENED) - mfspr r10,SPRN_DEAR SPECIAL_EXC_STORE(r10,DEAR) mfspr r10,SPRN_ESR SPECIAL_EXC_STORE(r10,ESR) - lbz r10,PACAIRQSOFTMASK(r13) - SPECIAL_EXC_STORE(r10,SOFTE) ld r10,_NIP(r1) SPECIAL_EXC_STORE(r10,CSRR0) ld r10,_MSR(r1) @@ -194,27 +187,6 @@ BEGIN_FTR_SECTION mtspr SPRN_MAS8,r10 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) - lbz r6,PACAIRQSOFTMASK(r13) - ld r5,SOFTE(r1) - - /* Interrupts had better not already be enabled... */ - tweqi r6,IRQS_ENABLED - - andi. r6,r5,IRQS_DISABLED - bne 1f - - TRACE_ENABLE_INTS - stb r5,PACAIRQSOFTMASK(r13) -1: - /* - * Restore PACAIRQHAPPENED rather than setting it based on - * the return MSR[EE], since we could have interrupted - * interrupt replay or other inconsistent transitory - * states that must remain that way. - */ - SPECIAL_EXC_LOAD(r10,IRQHAPPENED) - stb r10,PACAIRQHAPPENED(r13) - SPECIAL_EXC_LOAD(r10,DEAR) mtspr SPRN_DEAR,r10 SPECIAL_EXC_LOAD(r10,ESR) @@ -566,7 +538,7 @@ __end_interrupts: bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception + bl unknown_nmi_exception b ret_from_crit_except /* Machine Check Interrupt */ @@ -702,7 +674,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #ifdef CONFIG_BOOKE_WDT bl WatchdogException #else - bl unknown_exception + bl unknown_nmi_exception #endif b ret_from_crit_except @@ -886,7 +858,7 @@ kernel_dbg_exc: bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception + bl unknown_nmi_exception b ret_from_crit_except /* @@ -910,7 +882,7 @@ kernel_dbg_exc: bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception + bl unknown_nmi_exception b ret_from_crit_except /* Hypervisor call */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index efba9987069171..fd965cbe07d86c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1078,6 +1078,16 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception) _exception(SIGTRAP, regs, TRAP_UNK, 0); } +DEFINE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, TRAP_UNK, 0); + + return 0; +} + DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception) { if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, @@ -2181,10 +2191,11 @@ void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) return; } -DEFINE_INTERRUPT_HANDLER(WatchdogException) /* XXX NMI? async? */ +DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException) { printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); WatchdogHandler(regs); + return 0; } #endif From 097157e16cf8bf91b9cf6fbda05d234d3599c01f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:42:00 +1000 Subject: [PATCH 223/302] powerpc/64e/interrupt: reconcile irq soft-mask state in C Use existing 64s interrupt entry wrapper code to reconcile irqs in C. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-7-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 8 +++--- arch/powerpc/kernel/entry_64.S | 18 ++++++------- arch/powerpc/kernel/exceptions-64e.S | 39 +--------------------------- 3 files changed, 13 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index b17a55062bb80d..104a77c00a315d 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -51,14 +51,14 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup kuap_save_and_lock(regs); } #endif - /* - * Book3E reconciles irq soft mask in asm - */ -#ifdef CONFIG_PPC_BOOK3S_64 + +#ifdef CONFIG_PPC64 if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; +#endif +#ifdef CONFIG_PPC_BOOK3S_64 if (user_mode(regs)) { CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 555b3d0a3f38e6..03727308d8cc45 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -117,13 +117,12 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* - * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which - * would clobber syscall parameters. Also we always enter with IRQs - * enabled and nothing pending. system_call_exception() will call - * trace_hardirqs_off(). - * - * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The - * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED. + * scv enters with MSR[EE]=1 and is immediately considered soft-masked. + * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED, + * and interrupts may be masked and pending already. + * system_call_exception() will call trace_hardirqs_off() which means + * interrupts could already have been blocked before trace_hardirqs_off, + * but this is the best we can do. */ /* Calling convention has r9 = orig r0, r10 = regs */ @@ -288,9 +287,8 @@ END_BTB_FLUSH_SECTION std r11,-16(r10) /* "regshere" marker */ /* - * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which - * would clobber syscall parameters. Also we always enter with IRQs - * enabled and nothing pending. system_call_exception() will call + * We always enter kernel from userspace with irq soft-mask enabled and + * nothing pending. system_call_exception() will call * trace_hardirqs_off(). */ li r11,IRQS_ALL_DISABLED diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 18be576fc0b3e8..3c222a97f0232a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -409,28 +409,6 @@ exc_##n##_common: \ #define EXCEPTION_COMMON_DBG(n) \ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG) -/* - * This is meant for exceptions that don't immediately hard-enable. We - * set a bit in paca->irq_happened to ensure that a subsequent call to - * arch_local_irq_restore() will properly hard-enable and avoid the - * fast-path, and then reconcile irq state. - */ -#define INTS_DISABLE RECONCILE_IRQ_STATE(r3,r4) - -/* - * This is called by exceptions that don't use INTS_DISABLE (that did not - * touch irq indicators in the PACA). This will restore MSR:EE to it's - * previous value - * - * XXX In the long run, we may want to open-code it in order to separate the - * load from the wrtee, thus limiting the latency caused by the dependency - * but at this point, I'll favor code clarity until we have a near to final - * implementation - */ -#define INTS_RESTORE_HARD \ - ld r11,_MSR(r1); \ - wrtee r11; - /* XXX FIXME: Restore r14/r15 when necessary */ #define BAD_STACK_TRAMPOLINE(n) \ exc_##n##_bad_stack: \ @@ -479,7 +457,6 @@ exc_##n##_bad_stack: \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum) \ - INTS_DISABLE; \ ack(r8); \ CHECK_NAPPING(); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -559,7 +536,6 @@ __end_interrupts: mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR EXCEPTION_COMMON(0x300) - INTS_DISABLE b storage_fault_common /* Instruction Storage Interrupt */ @@ -569,7 +545,6 @@ __end_interrupts: li r15,0 mr r14,r10 EXCEPTION_COMMON(0x400) - INTS_DISABLE b storage_fault_common /* External Input Interrupt */ @@ -591,7 +566,6 @@ __end_interrupts: PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR EXCEPTION_COMMON(0x700) - INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) @@ -610,8 +584,7 @@ __end_interrupts: beq- 1f bl load_up_fpu b fast_interrupt_return -1: INTS_DISABLE - addi r3,r1,STACK_FRAME_OVERHEAD +1: addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception b interrupt_return @@ -631,7 +604,6 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - INTS_DISABLE addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception b interrupt_return @@ -642,7 +614,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) BOOKE_INTERRUPT_ALTIVEC_ASSIST, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x220) - INTS_DISABLE addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION @@ -691,7 +662,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0xf20) - INTS_DISABLE addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception b interrupt_return @@ -827,7 +797,6 @@ kernel_dbg_exc: */ mfspr r14,SPRN_DBSR EXCEPTION_COMMON_DBG(0xd08) - INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXDBG+EX_R14(r13) @@ -840,7 +809,6 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x260) - INTS_DISABLE CHECK_NAPPING() addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception @@ -870,7 +838,6 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x2c0) addi r3,r1,STACK_FRAME_OVERHEAD - INTS_RESTORE_HARD bl unknown_exception b interrupt_return @@ -891,7 +858,6 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x310) addi r3,r1,STACK_FRAME_OVERHEAD - INTS_RESTORE_HARD bl unknown_exception b interrupt_return @@ -901,7 +867,6 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x320) addi r3,r1,STACK_FRAME_OVERHEAD - INTS_RESTORE_HARD bl unknown_exception b interrupt_return @@ -911,7 +876,6 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x340) addi r3,r1,STACK_FRAME_OVERHEAD - INTS_RESTORE_HARD bl unknown_exception b interrupt_return @@ -991,7 +955,6 @@ alignment_more: addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) - INTS_RESTORE_HARD bl alignment_exception REST_NVGPRS(r1) b interrupt_return From ceff77efa4f8d9f02d8442171b325d3b7068fe5e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:42:01 +1000 Subject: [PATCH 224/302] powerpc/64e/interrupt: Use new interrupt context tracking scheme With the new interrupt exit code, context tracking can be managed more precisely, so remove the last of the 64e workarounds and switch to the new context tracking code already used by 64s. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-8-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 28 ---------------------------- arch/powerpc/kernel/interrupt.c | 16 +--------------- 2 files changed, 1 insertion(+), 43 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 104a77c00a315d..a2f551938e6489 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -21,9 +21,6 @@ static inline void nap_adjust_return(struct pt_regs *regs) } struct interrupt_state { -#ifdef CONFIG_PPC_BOOK3E_64 - enum ctx_state ctx_state; -#endif }; static inline void booke_restore_dbcr0(void) @@ -56,9 +53,7 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; -#endif -#ifdef CONFIG_PPC_BOOK3S_64 if (user_mode(regs)) { CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); @@ -75,12 +70,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup } #endif -#ifdef CONFIG_PPC_BOOK3E_64 - state->ctx_state = exception_enter(); - if (user_mode(regs)) - account_cpu_user_entry(); -#endif - booke_restore_dbcr0(); } @@ -100,25 +89,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup */ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { -#ifdef CONFIG_PPC_BOOK3E_64 - exception_exit(state->ctx_state); -#endif - if (user_mode(regs)) kuep_unlock(); - /* - * Book3S exits to user via interrupt_exit_user_prepare(), which does - * context tracking, which is a cleaner way to handle PREEMPT=y - * and avoid context entry/exit in e.g., preempt_schedule_irq()), - * which is likely to be where the core code wants to end up. - * - * The above comment explains why we can't do the - * - * if (user_mode(regs)) - * user_exit_irqoff(); - * - * sequence here. - */ } static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 381a618b5b5b06..1b0e1792ac37b5 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -235,10 +235,6 @@ static notrace void booke_load_dbcr0(void) #endif } -/* temporary hack for context tracking, removed in later patch */ -#include -asmlinkage __visible void __sched schedule_user(void); - /* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call @@ -296,11 +292,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); if (ti_flags & _TIF_NEED_RESCHED) { -#ifdef CONFIG_PPC_BOOK3E_64 - schedule_user(); -#else schedule(); -#endif } else { /* * SIGPENDING must restore signal handler function @@ -375,9 +367,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(!FULL_REGS(regs)); BUG_ON(arch_irq_disabled_regs(regs)); -#ifdef CONFIG_PPC_BOOK3S_64 CT_WARN_ON(ct_state() == CONTEXT_USER); -#endif /* * We don't need to restore AMR on the way back to userspace for KUAP. @@ -392,11 +382,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); /* returning to user: may enable */ if (ti_flags & _TIF_NEED_RESCHED) { -#ifdef CONFIG_PPC_BOOK3E_64 - schedule_user(); -#else schedule(); -#endif } else { if (ti_flags & _TIF_SIGPENDING) ret |= _TIF_RESTOREALL; @@ -464,7 +450,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (IS_ENABLED(CONFIG_BOOKS) && TRAP(regs) != 0x700) + if (TRAP(regs) != 0x700) CT_WARN_ON(ct_state() == CONTEXT_USER); kuap = kuap_get_and_assert_locked(); From d738ee8d56de38c91610741f672ec5c1ffae76fc Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:42:02 +1000 Subject: [PATCH 225/302] powerpc/64e/interrupt: handle bad_page_fault in C With non-volatile registers saved on interrupt, bad_page_fault can now be called by do_page_fault. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-9-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64e.S | 6 ------ arch/powerpc/mm/fault.c | 5 +---- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 3c222a97f0232a..7c3654b0d0f47b 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -937,12 +937,6 @@ storage_fault_common: ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) bl do_page_fault - cmpdi r3,0 - bne- 1f - b interrupt_return - mr r4,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - bl __bad_page_fault b interrupt_return /* diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0c0b1c2cfb49c8..18e588fda43db5 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -552,12 +552,9 @@ static long __do_page_fault(struct pt_regs *regs) if (likely(entry)) { instruction_pointer_set(regs, extable_fixup(entry)); return 0; - } else if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { + } else { __bad_page_fault(regs, err); return 0; - } else { - /* 32 and 64e handle the bad page fault in asm */ - return err; } } NOKPROBE_SYMBOL(__do_page_fault); From c45ba4f44f6b9c98a5fc1511d8853ad6843c877b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:42:03 +1000 Subject: [PATCH 226/302] powerpc: clean up do_page_fault search_exception_tables + __bad_page_fault can be substituted with bad_page_fault, do_page_fault no longer needs to return a value to asm for any sub-architecture, and __bad_page_fault can be static. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-10-npiggin@gmail.com --- arch/powerpc/include/asm/bug.h | 5 +---- arch/powerpc/include/asm/interrupt.h | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 16 +++++++--------- arch/powerpc/mm/fault.c | 27 ++++++++------------------- 4 files changed, 17 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index d1635ffbb179e7..0b2162890d8b65 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -111,11 +111,8 @@ #ifndef __ASSEMBLY__ struct pt_regs; -long do_page_fault(struct pt_regs *); -long hash__do_page_fault(struct pt_regs *); +void hash__do_page_fault(struct pt_regs *); void bad_page_fault(struct pt_regs *, int); -void __bad_page_fault(struct pt_regs *regs, int sig); -void do_bad_page_fault_segv(struct pt_regs *regs); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index a2f551938e6489..b1b9919e04890d 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -444,7 +444,7 @@ DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault); DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); /* fault.c */ -DECLARE_INTERRUPT_HANDLER_RET(do_page_fault); +DECLARE_INTERRUPT_HANDLER(do_page_fault); DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv); /* process.c */ diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 12de1906e97bc4..c1dace327e3961 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1583,10 +1583,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) { unsigned long dsisr = regs->dsisr; - long err; - if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) - goto page_fault; + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) { + hash__do_page_fault(regs); + return 0; + } /* * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then @@ -1606,13 +1607,10 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) return 0; } - err = __do_hash_fault(regs); - if (err) { -page_fault: - err = hash__do_page_fault(regs); - } + if (__do_hash_fault(regs)) + hash__do_page_fault(regs); - return err; + return 0; } #ifdef CONFIG_PPC_MM_SLICES diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 18e588fda43db5..5227def84b5e03 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -539,36 +539,25 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, } NOKPROBE_SYMBOL(___do_page_fault); -static long __do_page_fault(struct pt_regs *regs) +static __always_inline void __do_page_fault(struct pt_regs *regs) { - const struct exception_table_entry *entry; long err; err = ___do_page_fault(regs, regs->dar, regs->dsisr); - if (likely(!err)) - return err; - - entry = search_exception_tables(regs->nip); - if (likely(entry)) { - instruction_pointer_set(regs, extable_fixup(entry)); - return 0; - } else { - __bad_page_fault(regs, err); - return 0; - } + if (unlikely(err)) + bad_page_fault(regs, err); } -NOKPROBE_SYMBOL(__do_page_fault); -DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) +DEFINE_INTERRUPT_HANDLER(do_page_fault) { - return __do_page_fault(regs); + __do_page_fault(regs); } #ifdef CONFIG_PPC_BOOK3S_64 /* Same as do_page_fault but interrupt entry has already run in do_hash_fault */ -long hash__do_page_fault(struct pt_regs *regs) +void hash__do_page_fault(struct pt_regs *regs) { - return __do_page_fault(regs); + __do_page_fault(regs); } NOKPROBE_SYMBOL(hash__do_page_fault); #endif @@ -578,7 +567,7 @@ NOKPROBE_SYMBOL(hash__do_page_fault); * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void __bad_page_fault(struct pt_regs *regs, int sig) +static void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); From 8dc7f0229b7892ccb23e19c9f30511c68cc0fdcc Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:42:04 +1000 Subject: [PATCH 227/302] powerpc: remove partial register save logic All subarchitectures always save all GPRs to pt_regs interrupt frames now. Remove FULL_REGS and associated bits. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-11-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 17 ++--------------- arch/powerpc/kernel/align.c | 6 ------ arch/powerpc/kernel/interrupt.c | 3 --- arch/powerpc/kernel/process.c | 12 ------------ arch/powerpc/kernel/ptrace/ptrace-view.c | 21 --------------------- arch/powerpc/kernel/ptrace/ptrace.c | 2 -- arch/powerpc/kernel/ptrace/ptrace32.c | 4 ---- arch/powerpc/kernel/signal_32.c | 3 --- arch/powerpc/kernel/signal_64.c | 2 -- arch/powerpc/kernel/traps.c | 1 - arch/powerpc/lib/sstep.c | 4 ---- arch/powerpc/xmon/xmon.c | 23 +++++++---------------- 12 files changed, 9 insertions(+), 89 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index c93511bf6b3bb3..7793d6bd2d7d0b 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -188,29 +188,16 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) #ifdef __powerpc64__ #define TRAP_FLAGS_MASK 0x10 #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -#define FULL_REGS(regs) true -#define SET_FULL_REGS(regs) do { } while (0) -#define CHECK_FULL_REGS(regs) do { } while (0) -#define NV_REG_POISON 0xdeadbeefdeadbeefUL #else /* - * We use the least-significant bit of the trap field to indicate - * whether we have saved the full set of registers, or only a - * partial set. A 1 there means the partial set. - * On 4xx we use the next bit to indicate whether the exception + * On 4xx we use bit 1 in the trap word to indicate whether the exception * is a critical exception (1 means it is). */ -#define TRAP_FLAGS_MASK 0x1F +#define TRAP_FLAGS_MASK 0x1E #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -#define FULL_REGS(regs) true -#define SET_FULL_REGS(regs) do { } while (0) #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0) #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) -#define NV_REG_POISON 0xdeadbeef -#define CHECK_FULL_REGS(regs) \ -do { \ -} while (0) #endif /* __powerpc64__ */ static __always_inline void set_trap(struct pt_regs *regs, unsigned long val) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index a97d5f1a390594..938db36864ddc2 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -304,12 +304,6 @@ int fix_alignment(struct pt_regs *regs) struct instruction_op op; int r, type; - /* - * We require a complete register set, if not, then our assembly - * is broken - */ - CHECK_FULL_REGS(regs); - if (is_kernel_addr(regs->nip)) r = probe_kernel_read_inst(&instr, (void *)regs->nip); else diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 1b0e1792ac37b5..b953bb5027e6af 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -51,7 +51,6 @@ notrace long system_call_exception(long r3, long r4, long r5, if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(!FULL_REGS(regs)); BUG_ON(arch_irq_disabled_regs(regs)); #ifdef CONFIG_PPC_PKEY @@ -365,7 +364,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(!FULL_REGS(regs)); BUG_ON(arch_irq_disabled_regs(regs)); CT_WARN_ON(ct_state() == CONTEXT_USER); @@ -445,7 +443,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign unlikely(!(regs->msr & MSR_RI))) unrecoverable_exception(regs); BUG_ON(regs->msr & MSR_PR); - BUG_ON(!FULL_REGS(regs)); /* * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index b966c8e0ceadbc..5269a0d737ed37 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1448,11 +1448,9 @@ static void print_msr_bits(unsigned long val) #ifdef CONFIG_PPC64 #define REG "%016lx" #define REGS_PER_LINE 4 -#define LAST_VOLATILE 13 #else #define REG "%08lx" #define REGS_PER_LINE 8 -#define LAST_VOLATILE 12 #endif static void __show_regs(struct pt_regs *regs) @@ -1488,8 +1486,6 @@ static void __show_regs(struct pt_regs *regs) if ((i % REGS_PER_LINE) == 0) pr_cont("\nGPR%02d: ", i); pr_cont(REG " ", regs->gpr[i]); - if (i == LAST_VOLATILE && !FULL_REGS(regs)) - break; } pr_cont("\n"); /* @@ -1692,7 +1688,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, } else { /* user thread */ struct pt_regs *regs = current_pt_regs(); - CHECK_FULL_REGS(regs); *childregs = *regs; if (usp) childregs->gpr[1] = usp; @@ -1797,13 +1792,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) regs->ccr = 0; regs->gpr[1] = sp; - /* - * We have just cleared all the nonvolatile GPRs, so make - * FULL_REGS(regs) return true. This is necessary to allow - * ptrace to examine the thread immediately after exec. - */ - SET_FULL_REGS(regs); - #ifdef CONFIG_PPC32 regs->mq = 0; regs->nip = start; diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 0923c94f684e96..48ff9121e9c6e2 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -221,17 +221,9 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, #ifdef CONFIG_PPC64 struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe)); #endif - int i; - if (target->thread.regs == NULL) return -EIO; - if (!FULL_REGS(target->thread.regs)) { - /* We have a partial register set. Fill 14-31 with bogus values */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } - membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs)); membuf_store(&to_msr, get_user_msr(target)); @@ -252,8 +244,6 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, if (target->thread.regs == NULL) return -EIO; - CHECK_FULL_REGS(target->thread.regs); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, target->thread.regs, 0, PT_MSR * sizeof(reg)); @@ -729,19 +719,9 @@ static int gpr32_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - int i; - if (target->thread.regs == NULL) return -EIO; - if (!FULL_REGS(target->thread.regs)) { - /* - * We have a partial register set. - * Fill 14-31 with bogus values. - */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } return gpr32_get_common(target, regset, to, &target->thread.regs->gpr[0]); } @@ -754,7 +734,6 @@ static int gpr32_set(struct task_struct *target, if (target->thread.regs == NULL) return -EIO; - CHECK_FULL_REGS(target->thread.regs); return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, &target->thread.regs->gpr[0]); } diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 51801777906c29..0a0a33eb0d2825 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -59,7 +59,6 @@ long arch_ptrace(struct task_struct *child, long request, if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) ret = ptrace_get_reg(child, (int) index, &tmp); else @@ -81,7 +80,6 @@ long arch_ptrace(struct task_struct *child, long request, if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) ret = ptrace_put_reg(child, index, data); else diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c index d30b9ad70edc28..19c22480898213 100644 --- a/arch/powerpc/kernel/ptrace/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -83,7 +83,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if ((addr & 3) || (index > PT_FPSCR32)) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) { ret = ptrace_get_reg(child, index, &tmp); if (ret) @@ -133,7 +132,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if ((addr & 3) || numReg > PT_FPSCR) break; - CHECK_FULL_REGS(child->thread.regs); if (numReg >= PT_FPR0) { flush_fp_to_thread(child); /* get 64 bit FPR */ @@ -187,7 +185,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if ((addr & 3) || (index > PT_FPSCR32)) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) { ret = ptrace_put_reg(child, index, data); } else { @@ -226,7 +223,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, */ if ((addr & 3) || (numReg > PT_FPSCR)) break; - CHECK_FULL_REGS(child->thread.regs); if (numReg < PT_FPR0) { unsigned long freg; ret = ptrace_get_reg(child, numReg, &freg); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index fff4adc5a2b092..94442af383e1f6 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -94,8 +94,6 @@ __unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int val, i; - WARN_ON(!FULL_REGS(regs)); - for (i = 0; i <= PT_RESULT; i ++) { /* Force usr to alway see softe as 1 (interrupts enabled) */ if (i == PT_SOFTE) @@ -147,7 +145,6 @@ __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr) static __always_inline int __unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { - WARN_ON(!FULL_REGS(regs)); unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index e10459f11f8e03..dca66481d0c21d 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -172,7 +172,6 @@ static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc, } #endif /* CONFIG_VSX */ unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out); - WARN_ON(!FULL_REGS(regs)); unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out); unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out); unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out); @@ -309,7 +308,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, err |= __put_user(&sc->gp_regs, &sc->regs); err |= __put_user(&tm_sc->gp_regs, &tm_sc->regs); - WARN_ON(!FULL_REGS(regs)); err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE); err |= __copy_to_user(&sc->gp_regs, &tsk->thread.ckpt_regs, GP_REGS_SIZE); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index fd965cbe07d86c..2babed7a6a2999 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1318,7 +1318,6 @@ static int emulate_instruction(struct pt_regs *regs) if (!user_mode(regs)) return -EINVAL; - CHECK_FULL_REGS(regs); if (get_user(instword, (u32 __user *)(regs->nip))) return -EFAULT; diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 739ea6dc461c3a..45bda252075577 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1401,10 +1401,6 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; } - /* Following cases refer to regs->gpr[], so we need all regs */ - if (!FULL_REGS(regs)) - return -1; - rd = (word >> 21) & 0x1f; ra = (word >> 16) & 0x1f; rb = (word >> 11) & 0x1f; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 2e94647c87118b..361534f67082cc 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1815,25 +1815,16 @@ static void prregs(struct pt_regs *fp) } #ifdef CONFIG_PPC64 - if (FULL_REGS(fp)) { - for (n = 0; n < 16; ++n) - printf("R%.2d = "REG" R%.2d = "REG"\n", - n, fp->gpr[n], n+16, fp->gpr[n+16]); - } else { - for (n = 0; n < 7; ++n) - printf("R%.2d = "REG" R%.2d = "REG"\n", - n, fp->gpr[n], n+7, fp->gpr[n+7]); - } +#define R_PER_LINE 2 #else +#define R_PER_LINE 4 +#endif + for (n = 0; n < 32; ++n) { - printf("R%.2d = %.8lx%s", n, fp->gpr[n], - (n & 3) == 3? "\n": " "); - if (n == 12 && !FULL_REGS(fp)) { - printf("\n"); - break; - } + printf("R%.2d = "REG"%s", n, fp->gpr[n], + (n % R_PER_LINE) == R_PER_LINE - 1 ? "\n" : " "); } -#endif + printf("pc = "); xmon_print_symbol(fp->nip, " ", "\n"); if (!trap_is_syscall(fp) && cpu_has_feature(CPU_FTR_CFAR)) { From 8f6cc75a97d162011fad3c470e5a14e298383a07 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:42:05 +1000 Subject: [PATCH 228/302] powerpc: move norestart trap flag to bit 0 Compact the trap flags down to use the low 4 bits of regs.trap. A few 64e interrupt trap numbers set bit 4. Although they tended to be trivial so it wasn't a real problem[1], it is not the right thing to do, and confusing. [*] E.g., 0x310 hypercall goes to unknown_exception, which prints regs->trap directly so 0x310 will appear fine, and only the syscall interrupt will test norestart, so it won't be confused by 0x310. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-12-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 7793d6bd2d7d0b..9c9ab27461683b 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -185,20 +185,25 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) #define current_pt_regs() \ ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1) +/* + * The 4 low bits (0xf) are available as flags to overload the trap word, + * because interrupt vectors have minimum alignment of 0x10. TRAP_FLAGS_MASK + * must cover the bits used as flags, including bit 0 which is used as the + * "norestart" bit. + */ #ifdef __powerpc64__ -#define TRAP_FLAGS_MASK 0x10 -#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) +#define TRAP_FLAGS_MASK 0x1 #else /* * On 4xx we use bit 1 in the trap word to indicate whether the exception * is a critical exception (1 means it is). */ -#define TRAP_FLAGS_MASK 0x1E -#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) +#define TRAP_FLAGS_MASK 0xf #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0) #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) #endif /* __powerpc64__ */ +#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) static __always_inline void set_trap(struct pt_regs *regs, unsigned long val) { @@ -222,12 +227,12 @@ static inline bool trap_is_syscall(struct pt_regs *regs) static inline bool trap_norestart(struct pt_regs *regs) { - return regs->trap & 0x10; + return regs->trap & 0x1; } static __always_inline void set_trap_norestart(struct pt_regs *regs) { - regs->trap |= 0x10; + regs->trap |= 0x1; } #define arch_has_single_step() (1) From 58efe9f696cf908f40d6672aeca81cb2ad2bc762 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 31 Mar 2021 16:48:44 +0000 Subject: [PATCH 229/302] lib/vdso: Mark do_hres_timens() and do_coarse_timens() __always_inline() In the same spirit as commit c966533f8c6c ("lib/vdso: Mark do_hres() and do_coarse() as __always_inline"), mark do_hres_timens() and do_coarse_timens() __always_inline. The measurement below in on a non timens process, ie on the fastest path. On powerpc32, without the patch: clock-gettime-monotonic-raw: vdso: 1155 nsec/call clock-gettime-monotonic-coarse: vdso: 813 nsec/call clock-gettime-monotonic: vdso: 1076 nsec/call With the patch: clock-gettime-monotonic-raw: vdso: 1100 nsec/call clock-gettime-monotonic-coarse: vdso: 667 nsec/call clock-gettime-monotonic: vdso: 1025 nsec/call Signed-off-by: Christophe Leroy Reviewed-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/90dcf45ebadfd5a07f24241551c62f619d1cb930.1617209142.git.christophe.leroy@csgroup.eu --- lib/vdso/gettimeofday.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 2919f169814041..c6f6dee0874601 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -46,8 +46,8 @@ static inline bool vdso_cycles_ok(u64 cycles) #endif #ifdef CONFIG_TIME_NS -static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { const struct vdso_data *vd = __arch_get_timens_vdso_data(); const struct timens_offset *offs = &vdns->offset[clk]; @@ -97,8 +97,8 @@ static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) return NULL; } -static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { return -EINVAL; } @@ -159,8 +159,8 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, } #ifdef CONFIG_TIME_NS -static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { const struct vdso_data *vd = __arch_get_timens_vdso_data(); const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; @@ -188,8 +188,8 @@ static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, return 0; } #else -static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { return -1; } From 808094fcbf4196be0feb17afbbdc182ec95c8cec Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 31 Mar 2021 16:48:45 +0000 Subject: [PATCH 230/302] lib/vdso: Add vdso_data pointer as input to __arch_get_timens_vdso_data() For the same reason as commit e876f0b69dc9 ("lib/vdso: Allow architectures to provide the vdso data pointer"), powerpc wants to avoid calculation of relative position to code. As the timens_vdso_data is next page to vdso_data, provide vdso_data pointer to __arch_get_timens_vdso_data() in order to ease the calculation on powerpc in following patches. Signed-off-by: Christophe Leroy Reviewed-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Acked-by: Andrei Vagin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/539c4204b1baa77c55f758904a1ea239abbc7a5c.1617209142.git.christophe.leroy@csgroup.eu --- arch/arm64/include/asm/vdso/compat_gettimeofday.h | 3 ++- arch/arm64/include/asm/vdso/gettimeofday.h | 2 +- arch/s390/include/asm/vdso/gettimeofday.h | 3 ++- arch/x86/include/asm/vdso/gettimeofday.h | 3 ++- lib/vdso/gettimeofday.c | 15 +++++++++------ 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 7508b0ac1d21d5..ecb6fd4c3c647c 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -155,7 +155,8 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) } #ifdef CONFIG_TIME_NS -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { const struct vdso_data *ret; diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 631ab12816335f..de86230a943642 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -100,7 +100,7 @@ const struct vdso_data *__arch_get_vdso_data(void) #ifdef CONFIG_TIME_NS static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(void) +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return _timens_data; } diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index ed89ef74253069..383c53c3dddd55 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -68,7 +68,8 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) } #ifdef CONFIG_TIME_NS -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return _timens_data; } diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index df01d7349d7991..1936f21ed8cdaa 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -58,7 +58,8 @@ extern struct ms_hyperv_tsc_page hvclock_page #endif #ifdef CONFIG_TIME_NS -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return __timens_vdso_data; } diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index c6f6dee0874601..ce2f6955200321 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -49,13 +49,15 @@ static inline bool vdso_cycles_ok(u64 cycles) static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, struct __kernel_timespec *ts) { - const struct vdso_data *vd = __arch_get_timens_vdso_data(); + const struct vdso_data *vd; const struct timens_offset *offs = &vdns->offset[clk]; const struct vdso_timestamp *vdso_ts; u64 cycles, last, ns; u32 seq; s64 sec; + vd = vdns - (clk == CLOCK_MONOTONIC_RAW ? CS_RAW : CS_HRES_COARSE); + vd = __arch_get_timens_vdso_data(vd); if (clk != CLOCK_MONOTONIC_RAW) vd = &vd[CS_HRES_COARSE]; else @@ -92,7 +94,8 @@ static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_ return 0; } #else -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return NULL; } @@ -162,7 +165,7 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, struct __kernel_timespec *ts) { - const struct vdso_data *vd = __arch_get_timens_vdso_data(); + const struct vdso_data *vd = __arch_get_timens_vdso_data(vdns); const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; const struct timens_offset *offs = &vdns->offset[clk]; u64 nsec; @@ -310,7 +313,7 @@ __cvdso_gettimeofday_data(const struct vdso_data *vd, if (unlikely(tz != NULL)) { if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(); + vd = __arch_get_timens_vdso_data(vd); tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime; @@ -333,7 +336,7 @@ __cvdso_time_data(const struct vdso_data *vd, __kernel_old_time_t *time) if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(); + vd = __arch_get_timens_vdso_data(vd); t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); @@ -363,7 +366,7 @@ int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock, if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(); + vd = __arch_get_timens_vdso_data(vd); /* * Convert the clockid to a bitmask and use it to check which From 1c4bce6753857dc409a0197342d18764e7f4b741 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 31 Mar 2021 16:48:46 +0000 Subject: [PATCH 231/302] powerpc/vdso: Separate vvar vma from vdso Since commit 511157ab641e ("powerpc/vdso: Move vdso datapage up front") VVAR page is in front of the VDSO area. In result it breaks CRIU (Checkpoint Restore In Userspace) [1], where CRIU expects that "[vdso]" from /proc/../maps points at ELF/vdso image, rather than at VVAR data page. Laurent made a patch to keep CRIU working (by reading aux vector). But I think it still makes sence to separate two mappings into different VMAs. It will also make ppc64 less "special" for userspace and as a side-bonus will make VVAR page un-writable by debugger (which previously would COW page and can be unexpected). I opportunistically Cc stable on it: I understand that usually such stuff isn't a stable material, but that will allow us in CRIU have one workaround less that is needed just for one release (v5.11) on one platform (ppc64), which we otherwise have to maintain. I wouldn't go as far as to say that the commit 511157ab641e is ABI regression as no other userspace got broken, but I'd really appreciate if it gets backported to v5.11 after v5.12 is released, so as not to complicate already non-simple CRIU-vdso code. Thanks! [1]: https://github.com/checkpoint-restore/criu/issues/1417 Cc: stable@vger.kernel.org # v5.11 Signed-off-by: Dmitry Safonov Signed-off-by: Christophe Leroy Tested-by: Christophe Leroy Reviewed-by: Vincenzo Frascino # vDSO parts. Acked-by: Andrei Vagin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f401eb1ebc0bfc4d8f0e10dc8e525fd409eb68e2.1617209142.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu_context.h | 2 +- arch/powerpc/kernel/vdso.c | 54 +++++++++++++++++++------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 652ce85f9410a8..4bc45d3ed8b0e8 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -263,7 +263,7 @@ extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - unsigned long vdso_base = (unsigned long)mm->context.vdso - PAGE_SIZE; + unsigned long vdso_base = (unsigned long)mm->context.vdso; if (start <= vdso_base && vdso_base < end) mm->context.vdso = NULL; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index e839a906fdf230..b14907209822e6 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -55,10 +55,10 @@ static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struc { unsigned long new_size = new_vma->vm_end - new_vma->vm_start; - if (new_size != text_size + PAGE_SIZE) + if (new_size != text_size) return -EINVAL; - current->mm->context.vdso = (void __user *)new_vma->vm_start + PAGE_SIZE; + current->mm->context.vdso = (void __user *)new_vma->vm_start; return 0; } @@ -73,6 +73,10 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); } +static struct vm_special_mapping vvar_spec __ro_after_init = { + .name = "[vvar]", +}; + static struct vm_special_mapping vdso32_spec __ro_after_init = { .name = "[vdso]", .mremap = vdso32_mremap, @@ -89,11 +93,11 @@ static struct vm_special_mapping vdso64_spec __ro_after_init = { */ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { - struct mm_struct *mm = current->mm; + unsigned long vdso_size, vdso_base, mappings_size; struct vm_special_mapping *vdso_spec; + unsigned long vvar_size = PAGE_SIZE; + struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long vdso_size; - unsigned long vdso_base; if (is_32bit_task()) { vdso_spec = &vdso32_spec; @@ -110,8 +114,8 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int vdso_base = 0; } - /* Add a page to the vdso size for the data page */ - vdso_size += PAGE_SIZE; + mappings_size = vdso_size + vvar_size; + mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK; /* * pick a base address for the vDSO in process space. We try to put it @@ -119,9 +123,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * and end up putting it elsewhere. * Add enough to the size so that the result can be aligned. */ - vdso_base = get_unmapped_area(NULL, vdso_base, - vdso_size + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), - 0, 0); + vdso_base = get_unmapped_area(NULL, vdso_base, mappings_size, 0, 0); if (IS_ERR_VALUE(vdso_base)) return vdso_base; @@ -133,7 +135,13 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * install_special_mapping or the perf counter mmap tracking code * will fail to recognise it as a vDSO. */ - mm->context.vdso = (void __user *)vdso_base + PAGE_SIZE; + mm->context.vdso = (void __user *)vdso_base + vvar_size; + + vma = _install_special_mapping(mm, vdso_base, vvar_size, + VM_READ | VM_MAYREAD | VM_IO | + VM_DONTDUMP | VM_PFNMAP, &vvar_spec); + if (IS_ERR(vma)) + return PTR_ERR(vma); /* * our vma flags don't have VM_WRITE so by default, the process isn't @@ -145,9 +153,12 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * It's fine to use that for setting breakpoints in the vDSO code * pages though. */ - vma = _install_special_mapping(mm, vdso_base, vdso_size, + vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, vdso_spec); + if (IS_ERR(vma)) + do_munmap(mm, vdso_base, vvar_size, NULL); + return PTR_ERR_OR_ZERO(vma); } @@ -249,11 +260,22 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) if (!pagelist) panic("%s: Cannot allocate page list for VDSO", __func__); - pagelist[0] = virt_to_page(vdso_data); - for (i = 0; i < pages; i++) - pagelist[i + 1] = virt_to_page(start + i * PAGE_SIZE); + pagelist[i] = virt_to_page(start + i * PAGE_SIZE); + + return pagelist; +} + +static struct page ** __init vvar_setup_pages(void) +{ + struct page **pagelist; + /* .pages is NULL-terminated */ + pagelist = kcalloc(2, sizeof(struct page *), GFP_KERNEL); + if (!pagelist) + panic("%s: Cannot allocate page list for VVAR", __func__); + + pagelist[0] = virt_to_page(vdso_data); return pagelist; } @@ -295,6 +317,8 @@ static int __init vdso_init(void) if (IS_ENABLED(CONFIG_PPC64)) vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end); + vvar_spec.pages = vvar_setup_pages(); + smp_wmb(); return 0; From 74205b3fc2effde821b219d955c70e727dc43cc6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 31 Mar 2021 16:48:47 +0000 Subject: [PATCH 232/302] powerpc/vdso: Add support for time namespaces This patch adds the necessary glue to provide time namespaces. Things are mainly copied from ARM64. __arch_get_timens_vdso_data() calculates timens vdso data position based on the vdso data position, knowing it is the next page in vvar. This avoids having to redo the mflr/bcl/mflr/mtlr dance to locate the page relative to running code position. Signed-off-by: Christophe Leroy Reviewed-by: Vincenzo Frascino # vDSO parts Acked-by: Andrei Vagin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1a15495f80ec19a87b16cf874dbf7c3fa5ec40fe.1617209142.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 3 +- arch/powerpc/include/asm/vdso/gettimeofday.h | 10 ++ arch/powerpc/include/asm/vdso_datapage.h | 2 - arch/powerpc/kernel/vdso.c | 116 ++++++++++++++++--- arch/powerpc/kernel/vdso32/vdso32.lds.S | 2 +- arch/powerpc/kernel/vdso64/vdso64.lds.S | 2 +- 6 files changed, 114 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7c5c72cbf19f0c..475d77a6ebbe64 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -173,6 +173,7 @@ config PPC select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC select GENERIC_EARLY_IOREMAP + select GENERIC_GETTIMEOFDAY select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP if PCI @@ -180,7 +181,7 @@ config PPC select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL - select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_TIME_NS select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 77c635c2c90d43..1faff0be1111bd 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H #define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H +#include + #ifdef __ASSEMBLY__ #include @@ -154,6 +156,14 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *__arch_get_vdso_data(void); +#ifdef CONFIG_TIME_NS +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) +{ + return (void *)vd + PAGE_SIZE; +} +#endif + static inline bool vdso_clocksource_ok(const struct vdso_data *vd) { return true; diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 3f958ecf2beb56..a585c8e538ff0f 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -107,9 +107,7 @@ extern struct vdso_arch_data *vdso_data; bcl 20, 31, .+4 999: mflr \ptr -#if CONFIG_PPC_PAGE_SHIFT > 14 addis \ptr, \ptr, (_vdso_datapage - 999b)@ha -#endif addi \ptr, \ptr, (_vdso_datapage - 999b)@l .endm diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index b14907209822e6..717f2c9a7573cf 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,12 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_arch_data *vdso_data = &vdso_data_store.data; +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, unsigned long text_size) { @@ -73,8 +80,12 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); } +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf); + static struct vm_special_mapping vvar_spec __ro_after_init = { .name = "[vvar]", + .fault = vvar_fault, }; static struct vm_special_mapping vdso32_spec __ro_after_init = { @@ -87,6 +98,94 @@ static struct vm_special_mapping vdso64_spec __ro_after_init = { .mremap = vdso64_mremap, }; +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return ((struct vdso_arch_data *)vvar_page)->data; +} + +/* + * The vvar mapping contains data for a specific time namespace, so when a task + * changes namespace we must unmap its vvar data for the old namespace. + * Subsequent faults will map in data for the new namespace. + * + * For more details see timens_setup_vdso_data(). + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + mmap_read_lock(mm); + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (vma_is_special_mapping(vma, &vvar_spec)) + zap_page_range(vma, vma->vm_start, size); + } + + mmap_read_unlock(mm); + return 0; +} + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops. + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long pfn; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + if (timens_page) + pfn = page_to_pfn(timens_page); + else + pfn = virt_to_pfn(vdso_data); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = virt_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } + + return vmf_insert_pfn(vma, vmf->address, pfn); +} + /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree @@ -95,7 +194,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int { unsigned long vdso_size, vdso_base, mappings_size; struct vm_special_mapping *vdso_spec; - unsigned long vvar_size = PAGE_SIZE; + unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -266,19 +365,6 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) return pagelist; } -static struct page ** __init vvar_setup_pages(void) -{ - struct page **pagelist; - - /* .pages is NULL-terminated */ - pagelist = kcalloc(2, sizeof(struct page *), GFP_KERNEL); - if (!pagelist) - panic("%s: Cannot allocate page list for VVAR", __func__); - - pagelist[0] = virt_to_page(vdso_data); - return pagelist; -} - static int __init vdso_init(void) { #ifdef CONFIG_PPC64 @@ -317,8 +403,6 @@ static int __init vdso_init(void) if (IS_ENABLED(CONFIG_PPC64)) vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end); - vvar_spec.pages = vvar_setup_pages(); - smp_wmb(); return 0; diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index a4b806b0d618a7..58e0099f70f42e 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -17,7 +17,7 @@ ENTRY(_start) SECTIONS { - PROVIDE(_vdso_datapage = . - PAGE_SIZE); + PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 2f3c359cacd3a8..0288cad428b039 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -17,7 +17,7 @@ ENTRY(_start) SECTIONS { - PROVIDE(_vdso_datapage = . - PAGE_SIZE); + PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text From 7098f8f0cf0387443fd8702f24a8a2521d5133f3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 13 Apr 2021 23:54:27 +1000 Subject: [PATCH 233/302] powerpc/mm/radix: Make radix__change_memory_range() static The lkp bot pointed out that with W=1 we get: arch/powerpc/mm/book3s64/radix_pgtable.c:183:6: error: no previous prototype for 'radix__change_memory_range' Which is really saying that it could be static, make it so. Reported-by: kernel test robot Signed-off-by: Michael Ellerman --- arch/powerpc/mm/book3s64/radix_pgtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 55f26c0e389eb2..50d536ecc89b1a 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -180,8 +180,8 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, } #ifdef CONFIG_STRICT_KERNEL_RWX -void radix__change_memory_range(unsigned long start, unsigned long end, - unsigned long clear) +static void radix__change_memory_range(unsigned long start, unsigned long end, + unsigned long clear) { unsigned long idx; pgd_t *pgdp; From d6481a7195df4a8c828f9ee0b382f2dd36d3575c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 16 Apr 2021 21:05:47 +1000 Subject: [PATCH 234/302] powerpc/configs: Add PAPR_SCM to pseries_defconfig This is a pseries only driver, it should be built by default as part of pseries_defconfig to get some build coverage. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210416111209.765444-1-mpe@ellerman.id.au --- arch/powerpc/configs/pseries_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 777221775c8398..968095d7682c59 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -41,6 +41,7 @@ CONFIG_DTL=y CONFIG_SCANLOG=m CONFIG_PPC_SMLPAR=y CONFIG_IBMEBUS=y +CONFIG_PAPR_SCM=m CONFIG_PPC_SVM=y # CONFIG_PPC_PMAC is not set CONFIG_RTAS_FLASH=m From 7767d9ac89cee29c68f5dd278b3bb411d1c69287 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 16 Apr 2021 21:07:06 +1000 Subject: [PATCH 235/302] powerpc/papr_scm: Fix build error due to wrong printf specifier When I changed the rc variable to be long rather than int64_t I neglected to update the printk(), leading to a build break: arch/powerpc/platforms/pseries/papr_scm.c: In function 'papr_scm_pmem_flush': arch/powerpc/platforms/pseries/papr_scm.c:144:26: warning: format '%lld' expects argument of type 'long long int', but argument 3 has type 'long int' [-Wformat=] Fixes: 75b7c05ebf90 ("powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall") Reported-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210416111209.765444-2-mpe@ellerman.id.au --- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index ae6f5d80d5ceb9..48de2190211679 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -141,7 +141,7 @@ static int papr_scm_pmem_flush(struct nd_region *nd_region, } while (rc == H_BUSY); if (rc) { - dev_err(&p->pdev->dev, "flush error: %lld", rc); + dev_err(&p->pdev->dev, "flush error: %ld", rc); rc = -EIO; } else { dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index); From 0751fdf280416847d31d9b7276e4afc614fc6e15 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 16 Apr 2021 21:38:04 +1000 Subject: [PATCH 236/302] macintosh/via-pmu: Fix build warning Now that __fake_sleep is static, we get a warning about it being unused in some configurations: drivers/macintosh/via-pmu.c:190:12: warning: '__fake_sleep' defined but not used 190 | static int __fake_sleep; Move it inside the ifdef where it's used to avoid the warning. Fixes: 95d143923379 ("macintosh/via-pmu: Make some symbols static") Reported-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210416114139.772236-1-mpe@ellerman.id.au --- drivers/macintosh/via-pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 478766434919ba..4bdd4c45e7a724 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -187,7 +187,6 @@ static int query_batt_timer = BATTERY_POLLING_COUNT; static struct adb_request batt_req; static struct proc_dir_entry *proc_pmu_batt[PMU_MAX_BATTERIES]; -static int __fake_sleep; int asleep; #ifdef CONFIG_ADB @@ -1833,6 +1832,7 @@ pmu_present(void) */ static u32 save_via[8]; +static int __fake_sleep; static void save_via_state(void) From 38d0b1c9cec71e6d0f3bddef0bbce41d05a3e796 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Thu, 11 Feb 2021 12:24:35 -0600 Subject: [PATCH 237/302] powerpc/pseries: extract host bridge from pci_bus prior to bus removal The pci_bus->bridge reference may no longer be valid after pci_bus_remove() resulting in passing a bad value to device_unregister() for the associated bridge device. Store the host_bridge reference in a separate variable prior to pci_bus_remove(). Fixes: 7340056567e3 ("powerpc/pci: Reorder pci bus/bridge unregistration during PHB removal") Signed-off-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210211182435.47968-1-tyreld@linux.ibm.com --- arch/powerpc/platforms/pseries/pci_dlpar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index f9ae17e8a0f468..a8f9140a24fa3c 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -50,6 +50,7 @@ EXPORT_SYMBOL_GPL(init_phb_dynamic); int remove_phb_dynamic(struct pci_controller *phb) { struct pci_bus *b = phb->bus; + struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge); struct resource *res; int rc, i; @@ -76,7 +77,8 @@ int remove_phb_dynamic(struct pci_controller *phb) /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); - device_unregister(b->bridge); + host_bridge->bus = NULL; + device_unregister(&host_bridge->dev); /* Now release the IO resource */ if (res->flags & IORESOURCE_IO) From e9e16917bc388846163b8566a298a291d71e44c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 13 Apr 2021 15:03:52 +0200 Subject: [PATCH 238/302] powerpc/xive: Use the "ibm, chip-id" property only under PowerNV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'chip_id' field of the XIVE CPU structure is used to choose a target for a source located on the same chip. For that, the XIVE driver queries the chip identifier from the "ibm,chip-id" property and compares it to a 'src_chip' field identifying the chip of a source. This information is only available on the PowerNV platform, 'src_chip' being assigned to XIVE_INVALID_CHIP_ID under pSeries. The "ibm,chip-id" property is also not available on all platforms. It was first introduced on PowerNV and later, under QEMU for pSeries/KVM. However, the property is not part of PAPR and does not exist under pSeries/PowerVM. Assign 'chip_id' to XIVE_INVALID_CHIP_ID by default and let the PowerNV platform override the value with the "ibm,chip-id" property. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210413130352.1183267-1-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 9 +++------ arch/powerpc/sysdev/xive/native.c | 6 ++++++ arch/powerpc/sysdev/xive/xive-internal.h | 1 + 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index b025f42bf1f350..50469700dec6a6 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1413,17 +1413,14 @@ static int xive_prepare_cpu(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); if (!xc) { - struct device_node *np; - xc = kzalloc_node(sizeof(struct xive_cpu), GFP_KERNEL, cpu_to_node(cpu)); if (!xc) return -ENOMEM; - np = of_get_cpu_node(cpu, NULL); - if (np) - xc->chip_id = of_get_ibm_chip_id(np); - of_node_put(np); xc->hw_ipi = XIVE_BAD_IRQ; + xc->chip_id = XIVE_INVALID_CHIP_ID; + if (xive_ops->prepare_cpu) + xive_ops->prepare_cpu(cpu, xc); per_cpu(xive_cpu, cpu) = xc; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 05a800a3104edd..57e3f154043546 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -380,6 +380,11 @@ static void xive_native_update_pending(struct xive_cpu *xc) } } +static void xive_native_prepare_cpu(unsigned int cpu, struct xive_cpu *xc) +{ + xc->chip_id = cpu_to_chip_id(cpu); +} + static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) { s64 rc; @@ -462,6 +467,7 @@ static const struct xive_ops xive_native_ops = { .match = xive_native_match, .shutdown = xive_native_shutdown, .update_pending = xive_native_update_pending, + .prepare_cpu = xive_native_prepare_cpu, .setup_cpu = xive_native_setup_cpu, .teardown_cpu = xive_native_teardown_cpu, .sync_source = xive_native_sync_source, diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index b3a456fdd3a532..504e7edce35812 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -44,6 +44,7 @@ struct xive_ops { u32 *sw_irq); int (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); void (*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); + void (*prepare_cpu)(unsigned int cpu, struct xive_cpu *xc); void (*setup_cpu)(unsigned int cpu, struct xive_cpu *xc); void (*teardown_cpu)(unsigned int cpu, struct xive_cpu *xc); bool (*match)(struct device_node *np); From c47f892d7aa62765bf0689073f75990b4517a4cf Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 15 Apr 2021 17:39:32 +0530 Subject: [PATCH 239/302] powerpc/smp: Reintroduce cpu_core_mask Daniel reported that with Commit 4ca234a9cbd7 ("powerpc/smp: Stop updating cpu_core_mask") QEMU was unable to set single NUMA node SMP topologies such as: -smp 8,maxcpus=8,cores=2,threads=2,sockets=2 i.e he expected 2 sockets in one NUMA node. The above commit helped to reduce boot time on Large Systems for example 4096 vCPU single socket QEMU instance. PAPR is silent on having more than one socket within a NUMA node. cpu_core_mask and cpu_cpu_mask for any CPU would be same unless the number of sockets is different from the number of NUMA nodes. One option is to reintroduce cpu_core_mask but use a slightly different method to arrive at the cpu_core_mask. Previously each CPU's chip-id would be compared with all other CPU's chip-id to verify if both the CPUs were related at the chip level. Now if a CPU 'A' is found related / (unrelated) to another CPU 'B', all the thread siblings of 'A' and thread siblings of 'B' are automatically marked as related / (unrelated). Also if a platform doesn't support ibm,chip-id property, i.e its cpu_to_chip_id returns -1, cpu_core_map holds a copy of cpu_cpu_mask(). Fixes: 4ca234a9cbd7 ("powerpc/smp: Stop updating cpu_core_mask") Reported-by: Daniel Henrique Barboza Signed-off-by: Srikar Dronamraju Tested-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210415120934.232271-2-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/smp.h | 5 +++++ arch/powerpc/kernel/smp.c | 39 ++++++++++++++++++++++++++++------ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 7a13bc20f0a0ce..47081a9e13ca4d 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -121,6 +121,11 @@ static inline struct cpumask *cpu_sibling_mask(int cpu) return per_cpu(cpu_sibling_map, cpu); } +static inline struct cpumask *cpu_core_mask(int cpu) +{ + return per_cpu(cpu_core_map, cpu); +} + static inline struct cpumask *cpu_l2_cache_mask(int cpu) { return per_cpu(cpu_l2_cache_map, cpu); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 63ccc70bdd0d9c..f8576a2f34a08d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1057,17 +1057,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus) local_memory_node(numa_cpu_lookup_table[cpu])); } #endif - /* - * cpu_core_map is now more updated and exists only since - * its been exported for long. It only will have a snapshot - * of cpu_cpu_mask. - */ - cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); } /* Init the cpumasks so the boot CPU is related to itself */ cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); + cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); if (has_coregroup_support()) cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid)); @@ -1408,6 +1403,9 @@ static void remove_cpu_from_masks(int cpu) set_cpus_unrelated(cpu, i, cpu_smallcore_mask); } + for_each_cpu(i, cpu_core_mask(cpu)) + set_cpus_unrelated(cpu, i, cpu_core_mask); + if (has_coregroup_support()) { for_each_cpu(i, cpu_coregroup_mask(cpu)) set_cpus_unrelated(cpu, i, cpu_coregroup_mask); @@ -1468,8 +1466,11 @@ static void update_coregroup_mask(int cpu, cpumask_var_t *mask) static void add_cpu_to_masks(int cpu) { + struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; int first_thread = cpu_first_thread_sibling(cpu); + int chip_id = cpu_to_chip_id(cpu); cpumask_var_t mask; + bool ret; int i; /* @@ -1485,12 +1486,36 @@ static void add_cpu_to_masks(int cpu) add_cpu_to_smallcore_masks(cpu); /* In CPU-hotplug path, hence use GFP_ATOMIC */ - alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu)); + ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu)); update_mask_by_l2(cpu, &mask); if (has_coregroup_support()) update_coregroup_mask(cpu, &mask); + if (chip_id == -1 || !ret) { + cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); + goto out; + } + + if (shared_caches) + submask_fn = cpu_l2_cache_mask; + + /* Update core_mask with all the CPUs that are part of submask */ + or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask); + + /* Skip all CPUs already part of current CPU core mask */ + cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu)); + + for_each_cpu(i, mask) { + if (chip_id == cpu_to_chip_id(i)) { + or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask); + cpumask_andnot(mask, mask, submask_fn(i)); + } else { + cpumask_andnot(mask, mask, cpu_core_mask(i)); + } + } + +out: free_cpumask_var(mask); } From 131c82b6a1d261705a6f98368e501d43d994018d Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 15 Apr 2021 17:39:33 +0530 Subject: [PATCH 240/302] Revert "powerpc/topology: Update topology_core_cpumask" Now that cpu_core_mask has been reintroduced, lets revert commit 4bce545903fa ("powerpc/topology: Update topology_core_cpumask") Post this commit, lscpu should reflect topologies as requested by a user when a QEMU instance is launched with NUMA spanning multiple sockets. Reported-by: Daniel Henrique Barboza Signed-off-by: Srikar Dronamraju Tested-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210415120934.232271-3-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 3beeb030cd78e9..e4db64c0e1842e 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -126,7 +126,7 @@ static inline int cpu_to_coregroup_id(int cpu) #define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) -#define topology_core_cpumask(cpu) (cpu_cpu_mask(cpu)) +#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) #endif From c1e53367dab15e41814cff4e37df8ec4ac8fb9d7 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 15 Apr 2021 17:39:34 +0530 Subject: [PATCH 241/302] powerpc/smp: Cache CPU to chip lookup On systems with large CPUs per node, even with the filtered matching of related CPUs, there can be large number of calls to cpu_to_chip_id for the same CPU. For example with 4096 vCPU, 1 node QEMU configuration, with 4 threads per core, system could be see upto 1024 calls to cpu_to_chip_id() for the same CPU. On a given system, cpu_to_chip_id() for a given CPU would always return the same. Hence cache the result in a lookup table for use in subsequent calls. Since all CPUs sharing the same core will belong to the same chip, the lookup_table has an entry for one CPU per core. chip_id_lookup_table is not being freed and would be used on subsequent CPU online post CPU offline. Reported-by: Daniel Henrique Barboza Suggested-by: Michael Ellerman Signed-off-by: Srikar Dronamraju Tested-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210415120934.232271-4-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/smp.h | 1 + arch/powerpc/kernel/prom.c | 19 +++++++++++++++---- arch/powerpc/kernel/smp.c | 21 +++++++++++++++++++-- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 47081a9e13ca4d..03b3d010cbab66 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -31,6 +31,7 @@ extern u32 *cpu_to_phys_id; extern bool coregroup_enabled; extern int cpu_to_chip_id(int cpu); +extern int *chip_id_lookup_table; #ifdef CONFIG_SMP diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index a8b2d6bfc1ca7e..fbe9deebc8e13e 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -65,6 +65,8 @@ #define DBG(fmt...) #endif +int *chip_id_lookup_table; + #ifdef CONFIG_PPC64 int __initdata iommu_is_off; int __initdata iommu_force_on; @@ -914,13 +916,22 @@ EXPORT_SYMBOL(of_get_ibm_chip_id); int cpu_to_chip_id(int cpu) { struct device_node *np; + int ret = -1, idx; + + idx = cpu / threads_per_core; + if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1) + return chip_id_lookup_table[idx]; np = of_get_cpu_node(cpu, NULL); - if (!np) - return -1; + if (np) { + ret = of_get_ibm_chip_id(np); + of_node_put(np); + + if (chip_id_lookup_table) + chip_id_lookup_table[idx] = ret; + } - of_node_put(np); - return of_get_ibm_chip_id(np); + return ret; } EXPORT_SYMBOL(cpu_to_chip_id); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index f8576a2f34a08d..ad3e974726797e 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1073,6 +1073,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpu_smallcore_mask(boot_cpuid)); } + if (cpu_to_chip_id(boot_cpuid) != -1) { + int idx = num_possible_cpus() / threads_per_core; + + /* + * All threads of a core will all belong to the same core, + * chip_id_lookup_table will have one entry per core. + * Assumption: if boot_cpuid doesn't have a chip-id, then no + * other CPUs, will also not have chip-id. + */ + chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL); + if (chip_id_lookup_table) + memset(chip_id_lookup_table, -1, sizeof(int) * idx); + } + if (smp_ops && smp_ops->probe) smp_ops->probe(); } @@ -1468,8 +1482,8 @@ static void add_cpu_to_masks(int cpu) { struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; int first_thread = cpu_first_thread_sibling(cpu); - int chip_id = cpu_to_chip_id(cpu); cpumask_var_t mask; + int chip_id = -1; bool ret; int i; @@ -1492,7 +1506,10 @@ static void add_cpu_to_masks(int cpu) if (has_coregroup_support()) update_coregroup_mask(cpu, &mask); - if (chip_id == -1 || !ret) { + if (chip_id_lookup_table && ret) + chip_id = cpu_to_chip_id(cpu); + + if (chip_id == -1) { cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); goto out; } From 7de21e679e6a789f3729e8402bc440b623a28eae Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Thu, 17 Sep 2020 06:54:37 -0700 Subject: [PATCH 242/302] powerpc: fix EDEADLOCK redefinition error in uapi/asm/errno.h A few archs like powerpc have different errno.h values for macros EDEADLOCK and EDEADLK. In code including both libc and linux versions of errno.h, this can result in multiple definitions of EDEADLOCK in the include chain. Definitions to the same value (e.g. seen with mips) do not raise warnings, but on powerpc there are redefinitions changing the value, which raise warnings and errors (if using "-Werror"). Guard against these redefinitions to avoid build errors like the following, first seen cross-compiling libbpf v5.8.9 for powerpc using GCC 8.4.0 with musl 1.1.24: In file included from ../../arch/powerpc/include/uapi/asm/errno.h:5, from ../../include/linux/err.h:8, from libbpf.c:29: ../../include/uapi/asm-generic/errno.h:40: error: "EDEADLOCK" redefined [-Werror] #define EDEADLOCK EDEADLK In file included from toolchain-powerpc_8540_gcc-8.4.0_musl/include/errno.h:10, from libbpf.c:26: toolchain-powerpc_8540_gcc-8.4.0_musl/include/bits/errno.h:58: note: this is the location of the previous definition #define EDEADLOCK 58 cc1: all warnings being treated as errors Cc: Stable Reported-by: Rosen Penev Signed-off-by: Tony Ambardar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200917135437.1238787-1-Tony.Ambardar@gmail.com --- arch/powerpc/include/uapi/asm/errno.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/uapi/asm/errno.h b/arch/powerpc/include/uapi/asm/errno.h index cc79856896a192..4ba87de32be002 100644 --- a/arch/powerpc/include/uapi/asm/errno.h +++ b/arch/powerpc/include/uapi/asm/errno.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_ERRNO_H #define _ASM_POWERPC_ERRNO_H +#undef EDEADLOCK #include #undef EDEADLOCK From 7153d4bf0b373428d0393c001019da4d0483fddb Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Wed, 14 Apr 2021 19:00:33 +0800 Subject: [PATCH 243/302] powerpc/traps: Enhance readability for trap types Define macros to list ppc interrupt types in interttupt.h, replace the reference of the trap hex values with these macros. Referred the hex numbers in arch/powerpc/kernel/exceptions-64e.S, arch/powerpc/kernel/exceptions-64s.S, arch/powerpc/kernel/head_*.S, arch/powerpc/kernel/head_booke.h and arch/powerpc/include/asm/kvm_asm.h. Signed-off-by: Xiongwei Song [mpe: Resolve conflicts in nmi_disables_ftrace(), fix 40x build] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1618398033-13025-1-git-send-email-sxwjean@me.com --- arch/powerpc/include/asm/interrupt.h | 52 ++++++++++++++++++++++++--- arch/powerpc/kernel/fadump.c | 2 +- arch/powerpc/kernel/interrupt.c | 2 +- arch/powerpc/kernel/process.c | 4 ++- arch/powerpc/kernel/traps.c | 6 ++-- arch/powerpc/kexec/crash.c | 3 +- arch/powerpc/mm/book3s64/hash_utils.c | 4 +-- arch/powerpc/mm/fault.c | 16 ++++----- arch/powerpc/perf/core-book3s.c | 5 +-- arch/powerpc/xmon/xmon.c | 20 +++++++---- 10 files changed, 84 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index b1b9919e04890d..ed2c4042c3d18c 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -9,6 +9,50 @@ #include #include +/* BookE/4xx */ +#define INTERRUPT_CRITICAL_INPUT 0x100 + +/* BookE */ +#define INTERRUPT_DEBUG 0xd00 +#ifdef CONFIG_BOOKE +#define INTERRUPT_PERFMON 0x260 +#define INTERRUPT_DOORBELL 0x280 +#endif + +/* BookS/4xx/8xx */ +#define INTERRUPT_MACHINE_CHECK 0x200 + +/* BookS/8xx */ +#define INTERRUPT_SYSTEM_RESET 0x100 + +/* BookS */ +#define INTERRUPT_DATA_SEGMENT 0x380 +#define INTERRUPT_INST_SEGMENT 0x480 +#define INTERRUPT_TRACE 0xd00 +#define INTERRUPT_H_DATA_STORAGE 0xe00 +#define INTERRUPT_H_FAC_UNAVAIL 0xf80 +#ifdef CONFIG_PPC_BOOK3S +#define INTERRUPT_DOORBELL 0xa00 +#define INTERRUPT_PERFMON 0xf00 +#endif + +/* BookE/BookS/4xx/8xx */ +#define INTERRUPT_DATA_STORAGE 0x300 +#define INTERRUPT_INST_STORAGE 0x400 +#define INTERRUPT_ALIGNMENT 0x600 +#define INTERRUPT_PROGRAM 0x700 +#define INTERRUPT_SYSCALL 0xc00 + +/* BookE/BookS/44x */ +#define INTERRUPT_FP_UNAVAIL 0x800 + +/* BookE/BookS/44x/8xx */ +#define INTERRUPT_DECREMENTER 0x900 + +#ifndef INTERRUPT_PERFMON +#define INTERRUPT_PERFMON 0x0 +#endif + static inline void nap_adjust_return(struct pt_regs *regs) { #ifdef CONFIG_PPC_970_NAP @@ -65,7 +109,7 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != 0x700) + if (TRAP(regs) != INTERRUPT_PROGRAM) CT_WARN_ON(ct_state() != CONTEXT_KERNEL); } #endif @@ -131,13 +175,13 @@ static inline bool nmi_disables_ftrace(struct pt_regs *regs) { /* Allow DEC and PMI to be traced when they are soft-NMI */ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { - if (TRAP(regs) == 0x900) + if (TRAP(regs) == INTERRUPT_DECREMENTER) return false; - if (TRAP(regs) == 0xf00) + if (TRAP(regs) == INTERRUPT_PERFMON) return false; } if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { - if (TRAP(regs) == 0x260) + if (TRAP(regs) == INTERRUPT_PERFMON) return false; } diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index eddf362caedce8..b55b4c23f3b601 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -728,7 +728,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) * If we came in via system reset, wait a while for the secondary * CPUs to enter. */ - if (TRAP(&(fdh->regs)) == 0x100) { + if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) { msecs = CRASH_TIMEOUT; while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0)) mdelay(1); diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index b953bb5027e6af..e4559f8914eb78 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -447,7 +447,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != 0x700) + if (TRAP(regs) != INTERRUPT_PROGRAM) CT_WARN_ON(ct_state() == CONTEXT_USER); kuap = kuap_get_and_assert_locked(); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5269a0d737ed37..89e34aa273e21a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1467,7 +1467,9 @@ static void __show_regs(struct pt_regs *regs) trap = TRAP(regs); if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); - if (trap == 0x200 || trap == 0x300 || trap == 0x600) { + if (trap == INTERRUPT_MACHINE_CHECK || + trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_ALIGNMENT) { if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); else diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 2babed7a6a2999..b4ab95c9e94a89 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -221,7 +221,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, /* * system_reset_excption handles debugger, crash dump, panic, for 0x100 */ - if (TRAP(regs) == 0x100) + if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) return; crash_fadump(regs, "die oops"); @@ -289,7 +289,7 @@ void die(const char *str, struct pt_regs *regs, long err) /* * system_reset_excption handles debugger, crash dump, panic, for 0x100 */ - if (TRAP(regs) != 0x100) { + if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) { if (debugger(regs)) return; } @@ -1691,7 +1691,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) u8 status; bool hv; - hv = (TRAP(regs) == 0xf80); + hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL); if (hv) value = mfspr(SPRN_HFSCR); else diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index c9a889880214ee..0196d0c211aca7 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * The primary CPU waits a while for all secondary CPUs to enter. This is to @@ -336,7 +337,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * If we came in via system reset, wait a while for the secondary * CPUs to enter. */ - if (TRAP(regs) == 0x100) + if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) mdelay(PRIMARY_TIMEOUT); crash_kexec_prepare_cpus(crashing_cpu); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index c1dace327e3961..96d9aa1640073c 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1156,7 +1156,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) /* page is dirty */ if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) { - if (trap == 0x400) { + if (trap == INTERRUPT_INST_STORAGE) { flush_dcache_icache_page(page); set_bit(PG_dcache_clean, &page->flags); } else @@ -1556,7 +1556,7 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) if (user_mode(regs) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; - if (TRAP(regs) == 0x400) + if (TRAP(regs) == INTERRUPT_INST_STORAGE) access |= _PAGE_EXEC; err = hash_page_mm(mm, ea, access, TRAP(regs), flags); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 5227def84b5e03..34f641d4a2fe90 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -197,7 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address, bool is_write) { - int is_exec = TRAP(regs) == 0x400; + int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | @@ -391,7 +391,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, struct vm_area_struct * vma; struct mm_struct *mm = current->mm; unsigned int flags = FAULT_FLAG_DEFAULT; - int is_exec = TRAP(regs) == 0x400; + int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; int is_user = user_mode(regs); int is_write = page_fault_is_write(error_code); vm_fault_t fault, major = 0; @@ -574,20 +574,20 @@ static void __bad_page_fault(struct pt_regs *regs, int sig) /* kernel has accessed a bad area */ switch (TRAP(regs)) { - case 0x300: - case 0x380: - case 0xe00: + case INTERRUPT_DATA_STORAGE: + case INTERRUPT_DATA_SEGMENT: + case INTERRUPT_H_DATA_STORAGE: pr_alert("BUG: %s on %s at 0x%08lx\n", regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : "Unable to handle kernel data access", is_write ? "write" : "read", regs->dar); break; - case 0x400: - case 0x480: + case INTERRUPT_INST_STORAGE: + case INTERRUPT_INST_SEGMENT: pr_alert("BUG: Unable to handle kernel instruction fetch%s", regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n"); break; - case 0x600: + case INTERRUPT_ALIGNMENT: pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n", regs->dar); break; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b17358e8dc12c1..3f223b0bf5cb96 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 #include "internal.h" @@ -168,7 +169,7 @@ static bool regs_use_siar(struct pt_regs *regs) * they have not been setup using perf_read_regs() and so regs->result * is something random. */ - return ((TRAP(regs) == 0xf00) && regs->result); + return ((TRAP(regs) == INTERRUPT_PERFMON) && regs->result); } /* @@ -347,7 +348,7 @@ static inline void perf_read_regs(struct pt_regs *regs) * hypervisor samples as well as samples in the kernel with * interrupts off hence the userspace check. */ - if (TRAP(regs) != 0xf00) + if (TRAP(regs) != INTERRUPT_PERFMON) use_siar = 0; else if ((ppmu->flags & PPMU_NO_SIAR)) use_siar = 0; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 361534f67082cc..a619b9ed8458af 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -54,6 +54,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 #include @@ -605,7 +606,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) * debugger break (IPI). This is similar to * crash_kexec_secondary(). */ - if (TRAP(regs) != 0x100 || !wait_for_other_cpus(ncpus)) + if (TRAP(regs) != INTERRUPT_SYSTEM_RESET || !wait_for_other_cpus(ncpus)) smp_send_debugger_break(); wait_for_other_cpus(ncpus); @@ -615,7 +616,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if (!locked_down) { /* for breakpoint or single step, print curr insn */ - if (bp || TRAP(regs) == 0xd00) + if (bp || TRAP(regs) == INTERRUPT_TRACE) ppc_inst_dump(regs->nip, 1, 0); printf("enter ? for help\n"); } @@ -684,7 +685,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) disable_surveillance(); if (!locked_down) { /* for breakpoint or single step, print current insn */ - if (bp || TRAP(regs) == 0xd00) + if (bp || TRAP(regs) == INTERRUPT_TRACE) ppc_inst_dump(regs->nip, 1, 0); printf("enter ? for help\n"); } @@ -1769,9 +1770,12 @@ static void excprint(struct pt_regs *fp) printf(" sp: %lx\n", fp->gpr[1]); printf(" msr: %lx\n", fp->msr); - if (trap == 0x300 || trap == 0x380 || trap == 0x600 || trap == 0x200) { + if (trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_DATA_SEGMENT || + trap == INTERRUPT_ALIGNMENT || + trap == INTERRUPT_MACHINE_CHECK) { printf(" dar: %lx\n", fp->dar); - if (trap != 0x380) + if (trap != INTERRUPT_DATA_SEGMENT) printf(" dsisr: %lx\n", fp->dsisr); } @@ -1785,7 +1789,7 @@ static void excprint(struct pt_regs *fp) current->pid, current->comm); } - if (trap == 0x700) + if (trap == INTERRUPT_PROGRAM) print_bug_trap(fp); printf(linux_banner); @@ -1837,7 +1841,9 @@ static void prregs(struct pt_regs *fp) printf("ctr = "REG" xer = "REG" trap = %4lx\n", fp->ctr, fp->xer, fp->trap); trap = TRAP(fp); - if (trap == 0x300 || trap == 0x380 || trap == 0x600) + if (trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_DATA_SEGMENT || + trap == INTERRUPT_ALIGNMENT) printf("dar = "REG" dsisr = %.8lx\n", fp->dar, fp->dsisr); } From 6980d13f0dd189846887bbbfa43793d9a41768d3 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 1 Apr 2021 21:12:00 +0530 Subject: [PATCH 244/302] powerpc/smp: Set numa node before updating mask Geethika reported a trace when doing a dlpar CPU add. ------------[ cut here ]------------ WARNING: CPU: 152 PID: 1134 at kernel/sched/topology.c:2057 CPU: 152 PID: 1134 Comm: kworker/152:1 Not tainted 5.12.0-rc5-master #5 Workqueue: events cpuset_hotplug_workfn NIP: c0000000001cfc14 LR: c0000000001cfc10 CTR: c0000000007e3420 REGS: c0000034a08eb260 TRAP: 0700 Not tainted (5.12.0-rc5-master+) MSR: 8000000000029033 CR: 28828422 XER: 00000020 CFAR: c0000000001fd888 IRQMASK: 0 #012GPR00: c0000000001cfc10 c0000034a08eb500 c000000001f35400 0000000000000027 #012GPR04: c0000035abaa8010 c0000035abb30a00 0000000000000027 c0000035abaa8018 #012GPR08: 0000000000000023 c0000035abaaef48 00000035aa540000 c0000035a49dffe8 #012GPR12: 0000000028828424 c0000035bf1a1c80 0000000000000497 0000000000000004 #012GPR16: c00000000347a258 0000000000000140 c00000000203d468 c000000001a1a490 #012GPR20: c000000001f9c160 c0000034adf70920 c0000034aec9fd20 0000000100087bd3 #012GPR24: 0000000100087bd3 c0000035b3de09f8 0000000000000030 c0000035b3de09f8 #012GPR28: 0000000000000028 c00000000347a280 c0000034aefe0b00 c0000000010a2a68 NIP [c0000000001cfc14] build_sched_domains+0x6a4/0x1500 LR [c0000000001cfc10] build_sched_domains+0x6a0/0x1500 Call Trace: [c0000034a08eb500] [c0000000001cfc10] build_sched_domains+0x6a0/0x1500 (unreliable) [c0000034a08eb640] [c0000000001d1e6c] partition_sched_domains_locked+0x3ec/0x530 [c0000034a08eb6e0] [c0000000002936d4] rebuild_sched_domains_locked+0x524/0xbf0 [c0000034a08eb7e0] [c000000000296bb0] rebuild_sched_domains+0x40/0x70 [c0000034a08eb810] [c000000000296e74] cpuset_hotplug_workfn+0x294/0xe20 [c0000034a08ebc30] [c000000000178dd0] process_one_work+0x300/0x670 [c0000034a08ebd10] [c0000000001791b8] worker_thread+0x78/0x520 [c0000034a08ebda0] [c000000000185090] kthread+0x1a0/0x1b0 [c0000034a08ebe10] [c00000000000ccec] ret_from_kernel_thread+0x5c/0x70 Instruction dump: 7d2903a6 4e800421 e8410018 7f67db78 7fe6fb78 7f45d378 7f84e378 7c681b78 3c62ff1a 3863c6f8 4802dc35 60000000 <0fe00000> 3920fff4 f9210070 e86100a0 ---[ end trace 532d9066d3d4d7ec ]--- Some of the per-CPU masks use cpu_cpu_mask as a filter to limit the search for related CPUs. On a dlpar add of a CPU, update cpu_cpu_mask before updating the per-CPU masks. This will ensure the cpu_cpu_mask is updated correctly before its used in setting the masks. Setting the numa_node will ensure that when cpu_cpu_mask() gets called, the correct node number is used. This code movement helped fix the above call trace. Reported-by: Geetika Moolchandani Signed-off-by: Srikar Dronamraju Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210401154200.150077-1-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ad3e974726797e..2e05c783440a33 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1563,6 +1563,9 @@ void start_secondary(void *unused) vdso_getcpu_init(); #endif + set_numa_node(numa_cpu_lookup_table[cpu]); + set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); + /* Update topology CPU masks */ add_cpu_to_masks(cpu); @@ -1581,9 +1584,6 @@ void start_secondary(void *unused) shared_caches = true; } - set_numa_node(numa_cpu_lookup_table[cpu]); - set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); - smp_wmb(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); From 49c1d07fd04f54eb588c4a1dfcedc8d22c5ffd50 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 2 Apr 2021 12:41:24 +1000 Subject: [PATCH 245/302] powerpc/powernv: Enable HAIL (HV AIL) for ISA v3.1 processors Starting with ISA v3.1, LPCR[AIL] no longer controls the interrupt mode for HV=1 interrupts. Instead, a new LPCR[HAIL] bit is defined which behaves like AIL=3 for HV interrupts when set. Set HAIL on bare metal to give us mmu-on interrupts and improve performance. This also fixes an scv bug: we don't implement scv real mode (AIL=0) vectors because they are at an inconvenient location, so we just disable scv support when AIL can not be set. However powernv assumes that LPCR[AIL] will enable AIL mode so it enables scv support despite HV interrupts being AIL=0, which causes scv interrupts to go off into the weeds. Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210402024124.545826-1-npiggin@gmail.com --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/setup_64.c | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1be20bc8dce2f5..9086a2644c89b9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -441,6 +441,7 @@ #define LPCR_VRMA_LP1 ASM_CONST(0x0000800000000000) #define LPCR_RMLS 0x1C000000 /* Implementation dependent RMO limit sel */ #define LPCR_RMLS_SH 26 +#define LPCR_HAIL ASM_CONST(0x0000000004000000) /* HV AIL (ISAv3.1) */ #define LPCR_ILE ASM_CONST(0x0000000002000000) /* !HV irqs set MSR:LE */ #define LPCR_AIL ASM_CONST(0x0000000001800000) /* Alternate interrupt location */ #define LPCR_AIL_0 ASM_CONST(0x0000000000000000) /* MMU off exception offset 0x0 */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index ccbfcc88758ca6..b779d25761cf98 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -232,10 +232,23 @@ static void cpu_ready_for_interrupts(void) * If we are not in hypervisor mode the job is done once for * the whole partition in configure_exceptions(). */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + unsigned long new_lpcr = lpcr; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + /* P10 DD1 does not have HAIL */ + if (pvr_version_is(PVR_POWER10) && + (mfspr(SPRN_PVR) & 0xf00) == 0x100) + new_lpcr |= LPCR_AIL_3; + else + new_lpcr |= LPCR_HAIL; + } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + new_lpcr |= LPCR_AIL_3; + } + + if (new_lpcr != lpcr) + mtspr(SPRN_LPCR, new_lpcr); } /* From d8a1d6c58986d8778768b15dc5bac0b4b082d345 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 8 Apr 2021 13:15:04 +0530 Subject: [PATCH 246/302] powerpc/perf: Add platform specific check_attr_config Add platform specific attr.config value checks. Patch includes checks for both power9 and power10. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210408074504.248211-2-maddy@linux.ibm.com --- arch/powerpc/perf/isa207-common.c | 42 +++++++++++++++++++++++++++++++ arch/powerpc/perf/isa207-common.h | 2 ++ arch/powerpc/perf/power10-pmu.c | 13 ++++++++++ arch/powerpc/perf/power9-pmu.c | 13 ++++++++++ 4 files changed, 70 insertions(+) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 48b2d9a5096ca2..bf9094d8205fbd 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -694,3 +694,45 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, return num_alt; } + +int isa3XX_check_attr_config(struct perf_event *ev) +{ + u64 val, sample_mode; + u64 event = ev->attr.config; + + val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; + sample_mode = val & 0x3; + + /* + * MMCRA[61:62] is Random Sampling Mode (SM). + * value of 0b11 is reserved. + */ + if (sample_mode == 0x3) + return -EINVAL; + + /* + * Check for all reserved value + * Source: Performance Monitoring Unit User Guide + */ + switch (val) { + case 0x5: + case 0x9: + case 0xD: + case 0x19: + case 0x1D: + case 0x1A: + case 0x1E: + return -EINVAL; + } + + /* + * MMCRA[48:51]/[52:55]) Threshold Start/Stop + * Events Selection. + * 0b11110000/0b00001111 is reserved. + */ + val = (event >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK; + if (((val & 0xF0) == 0xF0) || ((val & 0xF) == 0xF)) + return -EINVAL; + + return 0; +} diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 1af0e8c97ac7bf..b4d2a2b2b346b3 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -280,4 +280,6 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, struct pt_regs *regs); void isa207_get_mem_weight(u64 *weight); +int isa3XX_check_attr_config(struct perf_event *ev); + #endif diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index a901c1348cad0d..f9d64c63bb4a72 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -106,6 +106,18 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return num_alt; } +static int power10_check_attr_config(struct perf_event *ev) +{ + u64 val; + u64 event = ev->attr.config; + + val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; + if (val == 0x10 || isa3XX_check_attr_config(ev)) + return -EINVAL; + + return 0; +} + GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC); GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL); GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL); @@ -559,6 +571,7 @@ static struct power_pmu power10_pmu = { .attr_groups = power10_pmu_attr_groups, .bhrb_nr = 32, .capabilities = PERF_PMU_CAP_EXTENDED_REGS, + .check_attr_config = power10_check_attr_config, }; int init_power10_pmu(void) diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 2a57e93a79dcf7..ff3382140d7e65 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -151,6 +151,18 @@ static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return num_alt; } +static int power9_check_attr_config(struct perf_event *ev) +{ + u64 val; + u64 event = ev->attr.config; + + val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; + if (val == 0xC || isa3XX_check_attr_config(ev)) + return -EINVAL; + + return 0; +} + GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC); GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); @@ -437,6 +449,7 @@ static struct power_pmu power9_pmu = { .attr_groups = power9_pmu_attr_groups, .bhrb_nr = 32, .capabilities = PERF_PMU_CAP_EXTENDED_REGS, + .check_attr_config = power9_check_attr_config, }; int init_power9_pmu(void) From cbd3d5ba46b68c033986a6087209930f001cbcca Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 19 Apr 2021 22:24:32 +1000 Subject: [PATCH 247/302] powerpc/fadump: Fix compile error since trap type change sfr reports that the allyesconfig build fails with: arch/powerpc/kernel/fadump.c: In function 'crash_fadump': arch/powerpc/kernel/fadump.c:731:28: error: 'INTERRUPT_SYSTEM_RESET' undeclared 731 | if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) { Add an include of interrupt.h to fix it. Fixes: 7153d4bf0b37 ("powerpc/traps: Enhance readability for trap types") Signed-off-by: Stephen Rothwell [mpe: Reformat change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210419191425.281dc58a@canb.auug.org.au --- arch/powerpc/kernel/fadump.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index b55b4c23f3b601..000e3b7f3fca56 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * The CPU who acquired the lock to trigger the fadump crash should From 864ec4d40c83365b16483d88990e7e579537635c Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Fri, 16 Apr 2021 18:27:50 +0530 Subject: [PATCH 248/302] powerpc/pseries/mce: Fix a typo in error type assignment The error type is ICACHE not DCACHE, for case MCE_ERROR_TYPE_ICACHE. Signed-off-by: Ganesh Goudar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210416125750.49550-1-ganeshgr@linux.ibm.com --- arch/powerpc/platforms/pseries/ras.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index f8b390a9d9fb31..9d4ef65da7f395 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -699,7 +699,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.error_type = MCE_ERROR_TYPE_DCACHE; break; case MC_ERROR_TYPE_I_CACHE: - mce_err.error_type = MCE_ERROR_TYPE_DCACHE; + mce_err.error_type = MCE_ERROR_TYPE_ICACHE; break; case MC_ERROR_TYPE_UNKNOWN: default: From 0e3b3ff83ce24a7a01e467ca42e3e33e87195c0d Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Fri, 16 Apr 2021 18:02:15 -0300 Subject: [PATCH 249/302] powerpc/pseries: Introduce dlpar_unisolate_drc() Next patch will execute a set-indicator call in hotplug-cpu.c. Create a dlpar_unisolate_drc() helper to avoid spreading more rtas_set_indicator() calls outside of dlpar.c. Signed-off-by: Daniel Henrique Barboza Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210416210216.380291-2-danielhb413@gmail.com --- arch/powerpc/platforms/pseries/dlpar.c | 14 ++++++++++++++ arch/powerpc/platforms/pseries/pseries.h | 1 + 2 files changed, 15 insertions(+) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 233503fcf8f08f..3ac70790ec7aa5 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -329,6 +329,20 @@ int dlpar_release_drc(u32 drc_index) return 0; } +int dlpar_unisolate_drc(u32 drc_index) +{ + int dr_status, rc; + + rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, + DR_ENTITY_SENSE, drc_index); + if (rc || dr_status != DR_ENTITY_PRESENT) + return -1; + + rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE); + + return 0; +} + int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) { int rc; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 8925a0fac15f70..1f051a786fb317 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -52,6 +52,7 @@ extern int dlpar_attach_node(struct device_node *, struct device_node *); extern int dlpar_detach_node(struct device_node *); extern int dlpar_acquire_drc(u32 drc_index); extern int dlpar_release_drc(u32 drc_index); +extern int dlpar_unisolate_drc(u32 drc_index); void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog); int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog); From 29c9a2699e71a7866a98ebdf6ea38135d31b4e1f Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Fri, 16 Apr 2021 18:02:16 -0300 Subject: [PATCH 250/302] powerpc/pseries: Set UNISOLATE on dlpar_cpu_remove() failure The RTAS set-indicator call, when attempting to UNISOLATE a DRC that is already UNISOLATED or CONFIGURED, returns RTAS_OK and does nothing else for both QEMU and phyp. This gives us an opportunity to use this behavior to signal the hypervisor layer when an error during device removal happens, allowing it to do a proper error handling, while not breaking QEMU/phyp implementations that don't have this support. This patch introduces this idea by unisolating all CPU DRCs that failed to be removed by dlpar_cpu_remove_by_index(), when handling the PSERIES_HP_ELOG_ID_DRC_INDEX event. This is being done for this event only because its the only CPU removal event QEMU uses, and there's no need at this moment to add this mechanism for phyp only code. Signed-off-by: Daniel Henrique Barboza Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210416210216.380291-3-danielhb413@gmail.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index ec478f8a98ff26..c230ab550aa960 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -816,8 +816,16 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) case PSERIES_HP_ELOG_ACTION_REMOVE: if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) rc = dlpar_cpu_remove_by_count(count); - else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) + else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) { rc = dlpar_cpu_remove_by_index(drc_index); + /* + * Setting the isolation state of an UNISOLATED/CONFIGURED + * device to UNISOLATE is a no-op, but the hypervisor can + * use it as a hint that the CPU removal failed. + */ + if (rc) + dlpar_unisolate_drc(drc_index); + } else rc = -EINVAL; break; From 2886e2df10beaf50352dad7a90907251bc692029 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Sun, 18 Apr 2021 12:29:42 -0700 Subject: [PATCH 251/302] Documentation/powerpc: Add proper links for manual and tests The links that are mentioned in this document are no longer valid. So changed the proper links for NXGZIP user manual and test cases. Reported-by: Bulent Abali Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/08511c1e92ac239f20ac88c73c59d1f8cf02e6ad.camel@linux.ibm.com --- Documentation/powerpc/vas-api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/powerpc/vas-api.rst b/Documentation/powerpc/vas-api.rst index 90c50ed839f349..bdb50fed903e03 100644 --- a/Documentation/powerpc/vas-api.rst +++ b/Documentation/powerpc/vas-api.rst @@ -254,7 +254,7 @@ using this window. the signal will be issued to the thread group leader signals. NX-GZIP User's Manual: -https://github.com/libnxz/power-gzip/blob/master/power_nx_gzip_um.pdf +https://github.com/libnxz/power-gzip/blob/master/doc/power_nx_gzip_um.pdf Simple example ============== @@ -301,5 +301,5 @@ Simple example close(fd) or window can be closed upon process exit } - Refer https://github.com/abalib/power-gzip for tests or more + Refer https://github.com/libnxz/power-gzip for tests or more use cases. From af31fd0c9107e400a8eb89d0eafb40bb78802f79 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Mon, 22 Mar 2021 10:57:23 -0400 Subject: [PATCH 252/302] powerpc/perf: Expose processor pipeline stage cycles using PERF_SAMPLE_WEIGHT_STRUCT Performance Monitoring Unit (PMU) registers in powerpc provides information on cycles elapsed between different stages in the pipeline. This can be used for application tuning. On ISA v3.1 platform, this information is exposed by sampling registers. Patch adds kernel support to capture two of the cycle counters as part of perf sample using the sample type: PERF_SAMPLE_WEIGHT_STRUCT. The power PMU function 'get_mem_weight' currently uses 64 bit weight field of perf_sample_data to capture memory latency. But following the introduction of PERF_SAMPLE_WEIGHT_TYPE, weight field could contain 64-bit or 32-bit value depending on the architexture support for PERF_SAMPLE_WEIGHT_STRUCT. Patches uses WEIGHT_STRUCT to expose the pipeline stage cycles info. Hence update the ppmu functions to work for 64-bit and 32-bit weight values. If the sample type is PERF_SAMPLE_WEIGHT, use the 64-bit weight field. if the sample type is PERF_SAMPLE_WEIGHT_STRUCT, memory subsystem latency is stored in the low 32bits of perf_sample_weight structure. Also for CPU_FTR_ARCH_31, capture the two cycle counter information in two 16 bit fields of perf_sample_weight structure. Signed-off-by: Athira Rajeev Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1616425047-1666-2-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/include/asm/perf_event_server.h | 2 +- arch/powerpc/perf/core-book3s.c | 4 +-- arch/powerpc/perf/isa207-common.c | 29 ++++++++++++++++++-- arch/powerpc/perf/isa207-common.h | 6 +++- 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index dde97d7d92532e..f4c3428e816bd4 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -43,7 +43,7 @@ struct power_pmu { u64 alt[]); void (*get_mem_data_src)(union perf_mem_data_src *dsrc, u32 flags, struct pt_regs *regs); - void (*get_mem_weight)(u64 *weight); + void (*get_mem_weight)(u64 *weight, u64 type); unsigned long group_constraint_mask; unsigned long group_constraint_val; u64 (*bhrb_filter_map)(u64 branch_sample_type); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3f223b0bf5cb96..16d4d1b6a1ffb5 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2218,9 +2218,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ppmu->get_mem_data_src) ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs); - if (event->attr.sample_type & PERF_SAMPLE_WEIGHT && + if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE && ppmu->get_mem_weight) - ppmu->get_mem_weight(&data.weight.full); + ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type); if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index bf9094d8205fbd..4e71a76c7734d8 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -284,8 +284,10 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, } } -void isa207_get_mem_weight(u64 *weight) +void isa207_get_mem_weight(u64 *weight, u64 type) { + union perf_sample_weight *weight_fields; + u64 weight_lat; u64 mmcra = mfspr(SPRN_MMCRA); u64 exp = MMCRA_THR_CTR_EXP(mmcra); u64 mantissa = MMCRA_THR_CTR_MANT(mmcra); @@ -296,9 +298,30 @@ void isa207_get_mem_weight(u64 *weight) mantissa = P10_MMCRA_THR_CTR_MANT(mmcra); if (val == 0 || val == 7) - *weight = 0; + weight_lat = 0; else - *weight = mantissa << (2 * exp); + weight_lat = mantissa << (2 * exp); + + /* + * Use 64 bit weight field (full) if sample type is + * WEIGHT. + * + * if sample type is WEIGHT_STRUCT: + * - store memory latency in the lower 32 bits. + * - For ISA v3.1, use remaining two 16 bit fields of + * perf_sample_weight to store cycle counter values + * from sier2. + */ + weight_fields = (union perf_sample_weight *)weight; + if (type & PERF_SAMPLE_WEIGHT) + weight_fields->full = weight_lat; + else { + weight_fields->var1_dw = (u32)weight_lat; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2)); + weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2)); + } + } } int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1) diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index b4d2a2b2b346b3..ae8d44e325c7e3 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -265,6 +265,10 @@ #define ISA207_SIER_DATA_SRC_SHIFT 53 #define ISA207_SIER_DATA_SRC_MASK (0x7ull << ISA207_SIER_DATA_SRC_SHIFT) +/* Bits in SIER2/SIER3 for Power10 */ +#define P10_SIER2_FINISH_CYC(sier2) (((sier2) >> (63 - 37)) & 0x7fful) +#define P10_SIER2_DISPATCH_CYC(sier2) (((sier2) >> (63 - 13)) & 0x7fful) + #define P(a, b) PERF_MEM_S(a, b) #define PH(a, b) (P(LVL, HIT) | P(a, b)) #define PM(a, b) (P(LVL, MISS) | P(a, b)) @@ -278,7 +282,7 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, const unsigned int ev_alt[][MAX_ALT]); void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, struct pt_regs *regs); -void isa207_get_mem_weight(u64 *weight); +void isa207_get_mem_weight(u64 *weight, u64 type); int isa3XX_check_attr_config(struct perf_event *ev); From 39d0099f94390eb7a677e1a5c9bb56a4daa242a1 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Thu, 1 Apr 2021 18:13:25 -0600 Subject: [PATCH 253/302] powerpc/pseries: Add shutdown() to vio_driver and vio_bus Currently, neither the vio_bus or vio_driver structures provide support for a shutdown() routine. Add support for shutdown() by allowing drivers to provide a implementation via function pointer in their vio_driver struct and provide a proper implementation in the driver template for the vio_bus that calls a vio drivers shutdown() if defined. In the case that no shutdown() is defined by a vio driver and a kexec is in progress we implement a big hammer that calls remove() to ensure no further DMA for the devices is possible. Signed-off-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210402001325.939668-1-tyreld@linux.ibm.com --- arch/powerpc/include/asm/vio.h | 1 + arch/powerpc/platforms/pseries/vio.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h index 721c0d6715ac8f..e7479a4abf969e 100644 --- a/arch/powerpc/include/asm/vio.h +++ b/arch/powerpc/include/asm/vio.h @@ -114,6 +114,7 @@ struct vio_driver { const struct vio_device_id *id_table; int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); void (*remove)(struct vio_dev *dev); + void (*shutdown)(struct vio_dev *dev); /* A driver must have a get_desired_dma() function to * be loaded in a CMO environment if it uses DMA. */ diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 429053d0402ad1..e00f3725ec9606 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -1278,6 +1279,20 @@ static int vio_bus_remove(struct device *dev) return 0; } +static void vio_bus_shutdown(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv; + + if (dev->driver) { + viodrv = to_vio_driver(dev->driver); + if (viodrv->shutdown) + viodrv->shutdown(viodev); + else if (kexec_in_progress) + vio_bus_remove(dev); + } +} + /** * vio_register_driver: - Register a new vio driver * @viodrv: The vio_driver structure to be registered. @@ -1617,6 +1632,7 @@ struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, + .shutdown = vio_bus_shutdown, }; /** From 3027a37c06be364e6443d3df3adf45576fba50cb Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 18 Apr 2021 23:16:41 +1000 Subject: [PATCH 254/302] powerpc: Only define _TASK_CPU for 32-bit We have some interesting code in our Makefile to define _TASK_CPU, based on awk'ing the value out of asm-offsets.h. It exists to circumvent some circular header dependencies that prevent us from referring to task_struct in the relevant code. See the comment around _TASK_CPU in smp.h for more detail. Maybe one day we can come up with a better solution, but for now we can at least limit that logic to 32-bit, because it's not needed for 64-bit. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210418131641.1186227-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 32dd693b4e4205..3212d076ac6a43 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -438,12 +438,15 @@ endif endif ifdef CONFIG_SMP +ifdef CONFIG_PPC32 prepare: task_cpu_prepare PHONY += task_cpu_prepare task_cpu_prepare: prepare0 $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TASK_CPU") print $$3;}' include/generated/asm-offsets.h)) -endif + +endif # CONFIG_PPC32 +endif # CONFIG_SMP PHONY += checkbin # Check toolchain versions: From ed8029d7b472369a010a1901358567ca3b6dbb0d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 18 Apr 2021 23:54:13 +1000 Subject: [PATCH 255/302] powerpc/pseries: Stop calling printk in rtas_stop_self() RCU complains about us calling printk() from an offline CPU: ============================= WARNING: suspicious RCU usage 5.12.0-rc7-02874-g7cf90e481cb8 #1 Not tainted ----------------------------- kernel/locking/lockdep.c:3568 RCU-list traversed in non-reader section!! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 2, debug_locks = 1 no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc7-02874-g7cf90e481cb8 #1 Call Trace: dump_stack+0xec/0x144 (unreliable) lockdep_rcu_suspicious+0x124/0x144 __lock_acquire+0x1098/0x28b0 lock_acquire+0x128/0x600 _raw_spin_lock_irqsave+0x6c/0xc0 down_trylock+0x2c/0x70 __down_trylock_console_sem+0x60/0x140 vprintk_emit+0x1a8/0x4b0 vprintk_func+0xcc/0x200 printk+0x40/0x54 pseries_cpu_offline_self+0xc0/0x120 arch_cpu_idle_dead+0x54/0x70 do_idle+0x174/0x4a0 cpu_startup_entry+0x38/0x40 rest_init+0x268/0x388 start_kernel+0x748/0x790 start_here_common+0x1c/0x614 Which happens because by the time we get to rtas_stop_self() we are already offline. In addition the message can be spammy, and is not that helpful for users, so remove it. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210418135413.1204031-1-mpe@ellerman.id.au --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index c230ab550aa960..7e970f81d8ff59 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -47,9 +47,6 @@ static void rtas_stop_self(void) BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); - printk("cpu %u (hwid %u) Ready to die...\n", - smp_processor_id(), hard_smp_processor_id()); - rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL); panic("Alas, I survived.\n"); From e4e8bc1df691ba5ba749d1e2b67acf9827e51a35 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 19 Apr 2021 22:01:39 +1000 Subject: [PATCH 256/302] powerpc/kvm: Fix PR KVM with KUAP/MEM_KEYS enabled The changes to add KUAP support with the hash MMU broke booting of KVM PR guests. The symptom is no visible progress of the guest, or possibly just "SLOF" being printed to the qemu console. Host code is still executing, but breaking into xmon might show a stack trace such as: __might_fault+0x84/0xe0 (unreliable) kvm_read_guest+0x1c8/0x2f0 [kvm] kvmppc_ld+0x1b8/0x2d0 [kvm] kvmppc_load_last_inst+0x50/0xa0 [kvm] kvmppc_exit_pr_progint+0x178/0x220 [kvm_pr] kvmppc_handle_exit_pr+0x31c/0xe30 [kvm_pr] after_sprg3_load+0x80/0x90 [kvm_pr] kvmppc_vcpu_run_pr+0x104/0x260 [kvm_pr] kvmppc_vcpu_run+0x34/0x48 [kvm] kvm_arch_vcpu_ioctl_run+0x340/0x450 [kvm] kvm_vcpu_ioctl+0x2ac/0x8c0 [kvm] sys_ioctl+0x320/0x1060 system_call_exception+0x160/0x270 system_call_common+0xf0/0x27c Bisect points to commit b2ff33a10c8b ("powerpc/book3s64/hash/kuap: Enable kuap on hash"), but that's just the commit that enabled KUAP with hash and made the bug visible. The root cause seems to be that KVM PR is creating kernel mappings that don't use the correct key, since we switched to using key 3. We have a helper for adding the right key value, however it's designed to take a pteflags variable, which the KVM code doesn't have. But we can make it work by passing 0 for the pteflags, and tell it explicitly that it should use the kernel key. With that changed guests boot successfully. Fixes: d94b827e89dc ("powerpc/book3s64/kuap: Use Key 3 for kernel mapping with hash translation") Cc: stable@vger.kernel.org # v5.11+ Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210419120139.1455937-1-mpe@ellerman.id.au --- arch/powerpc/kvm/book3s_64_mmu_host.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index e452158a18d771..5ac66be1cb3c7b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -133,6 +134,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, else kvmppc_mmu_flush_icache(pfn); + rflags |= pte_to_hpte_pkey_bits(0, HPTE_USE_KERNEL_KEY); rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg; /* From a9d2f9bb225fd2a764aef57738ab6c7f38d782ae Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 20 Apr 2021 01:54:04 -0300 Subject: [PATCH 257/302] powerpc/pseries/iommu: Fix window size for direct mapping with pmem As of today, if the DDW is big enough to fit (1 << MAX_PHYSMEM_BITS) it's possible to use direct DMA mapping even with pmem region. But, if that happens, the window size (len) is set to (MAX_PHYSMEM_BITS - page_shift) instead of MAX_PHYSMEM_BITS, causing a pagesize times smaller DDW to be created, being insufficient for correct usage. Fix this so the correct window size is used in this case. Fixes: bf6e2d562bbc4 ("powerpc/dma: Fallback to dma_ops when persistent memory present") Signed-off-by: Leonardo Bras Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210420045404.438735-1-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 67c9953a6503c6..5b3050ff0c55ba 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1252,7 +1252,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (pmem_present) { if (query.largest_available_block >= (1ULL << (MAX_PHYSMEM_BITS - page_shift))) - len = MAX_PHYSMEM_BITS - page_shift; + len = MAX_PHYSMEM_BITS; else dev_info(&dev->dev, "Skipping ibm,pmemory"); } From 0f5eb28a6ce6ab0882010e6727bfd6e8cd569273 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 19 Apr 2021 15:48:09 +0000 Subject: [PATCH 258/302] powerpc/8xx: Enhance readability of trap types This patch makes use of trap types in head_8xx.S Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e1147287bf6f2fb0693048fe8db0298c7870e419.1618847273.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 29 ++++++++++++---- arch/powerpc/kernel/head_8xx.S | 49 ++++++++++++++-------------- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index ed2c4042c3d18c..cf2c5c3ae71692 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -2,13 +2,6 @@ #ifndef _ASM_POWERPC_INTERRUPT_H #define _ASM_POWERPC_INTERRUPT_H -#include -#include -#include -#include -#include -#include - /* BookE/4xx */ #define INTERRUPT_CRITICAL_INPUT 0x100 @@ -39,9 +32,11 @@ /* BookE/BookS/4xx/8xx */ #define INTERRUPT_DATA_STORAGE 0x300 #define INTERRUPT_INST_STORAGE 0x400 +#define INTERRUPT_EXTERNAL 0x500 #define INTERRUPT_ALIGNMENT 0x600 #define INTERRUPT_PROGRAM 0x700 #define INTERRUPT_SYSCALL 0xc00 +#define INTERRUPT_TRACE 0xd00 /* BookE/BookS/44x */ #define INTERRUPT_FP_UNAVAIL 0x800 @@ -53,6 +48,24 @@ #define INTERRUPT_PERFMON 0x0 #endif +/* 8xx */ +#define INTERRUPT_SOFT_EMU_8xx 0x1000 +#define INTERRUPT_INST_TLB_MISS_8xx 0x1100 +#define INTERRUPT_DATA_TLB_MISS_8xx 0x1200 +#define INTERRUPT_INST_TLB_ERROR_8xx 0x1300 +#define INTERRUPT_DATA_TLB_ERROR_8xx 0x1400 +#define INTERRUPT_DATA_BREAKPOINT_8xx 0x1c00 +#define INTERRUPT_INST_BREAKPOINT_8xx 0x1d00 + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include +#include + static inline void nap_adjust_return(struct pt_regs *regs) { #ifdef CONFIG_PPC_970_NAP @@ -514,4 +527,6 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) local_irq_enable(); } +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index e3b066703eab28..7d445e4342c0c6 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -29,6 +29,7 @@ #include #include #include +#include /* * Value for the bits that have fixed value in RPN entries. @@ -118,49 +119,49 @@ instruction_counter: #endif /* System reset */ - EXCEPTION(0x100, Reset, system_reset_exception) + EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, system_reset_exception) /* Machine check */ - START_EXCEPTION(0x200, MachineCheck) - EXCEPTION_PROLOG 0x200 MachineCheck handle_dar_dsisr=1 + START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck) + EXCEPTION_PROLOG INTERRUPT_MACHINE_CHECK MachineCheck handle_dar_dsisr=1 prepare_transfer_to_handler bl machine_check_exception b interrupt_return /* External interrupt */ - EXCEPTION(0x500, HardwareInterrupt, do_IRQ) + EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ) /* Alignment exception */ - START_EXCEPTION(0x600, Alignment) - EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 + START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment) + EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1 prepare_transfer_to_handler bl alignment_exception REST_NVGPRS(r1) b interrupt_return /* Program check exception */ - START_EXCEPTION(0x700, ProgramCheck) - EXCEPTION_PROLOG 0x700 ProgramCheck + START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck) + EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck prepare_transfer_to_handler bl program_check_exception REST_NVGPRS(r1) b interrupt_return /* Decrementer */ - EXCEPTION(0x900, Decrementer, timer_interrupt) + EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt) /* System call */ - START_EXCEPTION(0xc00, SystemCall) - SYSCALL_ENTRY 0xc00 + START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall) + SYSCALL_ENTRY INTERRUPT_SYSCALL /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, single_step_exception) + EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - START_EXCEPTION(0x1000, SoftEmu) - EXCEPTION_PROLOG 0x1000 SoftEmu + START_EXCEPTION(INTERRUPT_SOFT_EMU_8xx, SoftEmu) + EXCEPTION_PROLOG INTERRUPT_SOFT_EMU_8xx SoftEmu prepare_transfer_to_handler bl emulation_assist_interrupt REST_NVGPRS(r1) @@ -187,7 +188,7 @@ instruction_counter: #define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) #endif - START_EXCEPTION(0x1100, InstructionTLBMiss) + START_EXCEPTION(INTERRUPT_INST_TLB_MISS_8xx, InstructionTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 @@ -243,7 +244,7 @@ instruction_counter: rfi #endif - START_EXCEPTION(0x1200, DataStoreTLBMiss) + START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 mfcr r11 @@ -306,9 +307,9 @@ instruction_counter: * to many reasons, such as executing guarded memory or illegal instruction * addresses. There is nothing to do but handle a big time error fault. */ - START_EXCEPTION(0x1300, InstructionTLBError) + START_EXCEPTION(INTERRUPT_INST_TLB_ERROR_8xx, InstructionTLBError) /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ - EXCEPTION_PROLOG 0x400 InstructionTLBError + EXCEPTION_PROLOG INTERRUPT_INST_STORAGE InstructionTLBError andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ andis. r10,r9,SRR1_ISI_NOPT@h beq+ .Litlbie @@ -324,7 +325,7 @@ instruction_counter: * many reasons, including a dirty update to a pte. We bail out to * a higher level function that can handle it. */ - START_EXCEPTION(0x1400, DataTLBError) + START_EXCEPTION(INTERRUPT_DATA_TLB_ERROR_8xx, DataTLBError) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_DAR cmpwi cr1, r11, RPN_PATTERN @@ -332,7 +333,7 @@ instruction_counter: DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_1 /* 0x300 is DataAccess exception, needed by bad_page_fault() */ - EXCEPTION_PROLOG_2 0x300 DataTLBError handle_dar_dsisr=1 + EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1 lwz r4, _DAR(r11) lwz r5, _DSISR(r11) andis. r10,r5,DSISR_NOHPTE@h @@ -351,7 +352,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ * support of breakpoints and such. Someday I will get around to * using them. */ - START_EXCEPTION(0x1c00, DataBreakpoint) + START_EXCEPTION(INTERRUPT_DATA_BREAKPOINT_8xx, DataBreakpoint) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_SRR0 cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l @@ -364,7 +365,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ rfi 1: EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 0x1c00 DataBreakpoint handle_dar_dsisr=1 + EXCEPTION_PROLOG_2 INTERRUPT_DATA_BREAKPOINT_8xx DataBreakpoint handle_dar_dsisr=1 mfspr r4,SPRN_BAR stw r4,_DAR(r11) prepare_transfer_to_handler @@ -373,7 +374,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ b interrupt_return #ifdef CONFIG_PERF_EVENTS - START_EXCEPTION(0x1d00, InstructionBreakpoint) + START_EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, InstructionBreakpoint) mtspr SPRN_SPRG_SCRATCH0, r10 lwz r10, (instruction_counter - PAGE_OFFSET)@l(0) addi r10, r10, -1 @@ -384,7 +385,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ mfspr r10, SPRN_SPRG_SCRATCH0 rfi #else - EXCEPTION(0x1d00, Trap_1d, unknown_exception) + EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, Trap_1d, unknown_exception) #endif EXCEPTION(0x1e00, Trap_1e, unknown_exception) EXCEPTION(0x1f00, Trap_1f, unknown_exception) From 7fab639729ce4a0ecb3c528cd68b0c0598696ef9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 19 Apr 2021 15:48:10 +0000 Subject: [PATCH 259/302] powerpc/32s: Enhance readability of trap types This patch makes use of trap types in head_book3s_32.S Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bd80ace67757f489fc4ecdb76dd1a71511daba94.1618847273.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 6 ++++ arch/powerpc/kernel/head_book3s_32.S | 43 ++++++++++++++-------------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index cf2c5c3ae71692..8970990e3b08e6 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -27,6 +27,7 @@ #ifdef CONFIG_PPC_BOOK3S #define INTERRUPT_DOORBELL 0xa00 #define INTERRUPT_PERFMON 0xf00 +#define INTERRUPT_ALTIVEC_UNAVAIL 0xf20 #endif /* BookE/BookS/4xx/8xx */ @@ -57,6 +58,11 @@ #define INTERRUPT_DATA_BREAKPOINT_8xx 0x1c00 #define INTERRUPT_INST_BREAKPOINT_8xx 0x1d00 +/* 603 */ +#define INTERRUPT_INST_TLB_MISS_603 0x1000 +#define INTERRUPT_DATA_LOAD_TLB_MISS_603 0x1100 +#define INTERRUPT_DATA_STORE_TLB_MISS_603 0x1200 + #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 18f4ae163f34a5..065178f19a3d62 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -31,6 +31,7 @@ #include #include #include +#include #include "head_32.h" @@ -239,7 +240,7 @@ __secondary_hold_acknowledge: /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and putting it back to what it was (unknown_async_exception) when done. */ - EXCEPTION(0x100, Reset, unknown_async_exception) + EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, unknown_async_exception) /* Machine check */ /* @@ -255,7 +256,7 @@ __secondary_hold_acknowledge: * pointer when we take an exception from supervisor mode.) * -- paulus. */ - START_EXCEPTION(0x200, MachineCheck) + START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck) EXCEPTION_PROLOG_0 #ifdef CONFIG_PPC_CHRP mtspr SPRN_SPRG_SCRATCH2,r1 @@ -276,7 +277,7 @@ __secondary_hold_acknowledge: b interrupt_return /* Data access exception. */ - START_EXCEPTION(0x300, DataAccess) + START_EXCEPTION(INTERRUPT_DATA_STORAGE, DataAccess) #ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION mtspr SPRN_SPRG_SCRATCH2,r10 @@ -297,7 +298,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) #endif 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 0x300 DataAccess handle_dar_dsisr=1 + EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1 prepare_transfer_to_handler lwz r5, _DSISR(r11) andis. r0, r5, DSISR_DABRMATCH@h @@ -310,7 +311,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) /* Instruction access exception. */ - START_EXCEPTION(0x400, InstructionAccess) + START_EXCEPTION(INTERRUPT_INST_STORAGE, InstructionAccess) mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 mfspr r10, SPRN_SPRG_THREAD @@ -330,7 +331,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) andi. r11, r11, MSR_PR EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 0x400 InstructionAccess + EXCEPTION_PROLOG_2 INTERRUPT_INST_STORAGE InstructionAccess andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r5, _DSISR(r11) stw r12, _DAR(r11) @@ -339,19 +340,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) b interrupt_return /* External interrupt */ - EXCEPTION(0x500, HardwareInterrupt, do_IRQ) + EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ) /* Alignment exception */ - START_EXCEPTION(0x600, Alignment) - EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 + START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment) + EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1 prepare_transfer_to_handler bl alignment_exception REST_NVGPRS(r1) b interrupt_return /* Program check exception */ - START_EXCEPTION(0x700, ProgramCheck) - EXCEPTION_PROLOG 0x700 ProgramCheck + START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck) + EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck prepare_transfer_to_handler bl program_check_exception REST_NVGPRS(r1) @@ -367,7 +368,7 @@ BEGIN_FTR_SECTION */ b ProgramCheck END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) - EXCEPTION_PROLOG 0x800 FPUnavailable + EXCEPTION_PROLOG INTERRUPT_FP_UNAVAIL FPUnavailable beq 1f bl load_up_fpu /* if from user, just load it up */ b fast_exception_return @@ -379,16 +380,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) #endif /* Decrementer */ - EXCEPTION(0x900, Decrementer, timer_interrupt) + EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt) EXCEPTION(0xa00, Trap_0a, unknown_exception) EXCEPTION(0xb00, Trap_0b, unknown_exception) /* System call */ - START_EXCEPTION(0xc00, SystemCall) - SYSCALL_ENTRY 0xc00 + START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall) + SYSCALL_ENTRY INTERRUPT_SYSCALL - EXCEPTION(0xd00, SingleStep, single_step_exception) + EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception) EXCEPTION(0xe00, Trap_0e, unknown_exception) /* @@ -399,10 +400,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) * non-altivec kernel running on a machine with altivec just * by executing an altivec instruction. */ - START_EXCEPTION(0xf00, PerformanceMonitorTrap) + START_EXCEPTION(INTERRUPT_PERFMON, PerformanceMonitorTrap) b PerformanceMonitor - START_EXCEPTION(0xf20, AltiVecUnavailableTrap) + START_EXCEPTION(INTERRUPT_ALTIVEC_UNAVAIL, AltiVecUnavailableTrap) b AltiVecUnavailable __HEAD @@ -410,7 +411,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) * Handle TLB miss for instruction on 603/603e. * Note: we get an alternate set of r0 - r3 to use automatically. */ - . = 0x1000 + . = INTERRUPT_INST_TLB_MISS_603 InstructionTLBMiss: /* * r0: scratch @@ -476,7 +477,7 @@ InstructionAddressInvalid: /* * Handle TLB miss for DATA Load operation on 603/603e */ - . = 0x1100 + . = INTERRUPT_DATA_LOAD_TLB_MISS_603 DataLoadTLBMiss: /* * r0: scratch @@ -554,7 +555,7 @@ DataAddressInvalid: /* * Handle TLB miss for DATA Store on 603/603e */ - . = 0x1200 + . = INTERRUPT_DATA_STORE_TLB_MISS_603 DataStoreTLBMiss: /* * r0: scratch From e522331173ec9af563461e0fae534e83ce39e8e3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 19 Apr 2021 15:48:11 +0000 Subject: [PATCH 260/302] powerpc/irq: Enhance readability of trap types This patch makes use of trap types in irq.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f7f8c9f98c33eaea316755c7fef150d1d77e047d.1618847273.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 1 + arch/powerpc/kernel/irq.c | 13 +++++-------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 8970990e3b08e6..44cde2e129b883 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -23,6 +23,7 @@ #define INTERRUPT_INST_SEGMENT 0x480 #define INTERRUPT_TRACE 0xd00 #define INTERRUPT_H_DATA_STORAGE 0xe00 +#define INTERRUPT_HMI 0xe60 #define INTERRUPT_H_FAC_UNAVAIL 0xf80 #ifdef CONFIG_PPC_BOOK3S #define INTERRUPT_DOORBELL 0xa00 diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 893d3f8d6f4729..72cb45393ef295 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -142,7 +142,7 @@ void replay_soft_interrupts(void) */ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { local_paca->irq_happened &= ~PACA_IRQ_HMI; - regs.trap = 0xe60; + regs.trap = INTERRUPT_HMI; handle_hmi_exception(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -150,7 +150,7 @@ void replay_soft_interrupts(void) if (local_paca->irq_happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; - regs.trap = 0x900; + regs.trap = INTERRUPT_DECREMENTER; timer_interrupt(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -158,7 +158,7 @@ void replay_soft_interrupts(void) if (local_paca->irq_happened & PACA_IRQ_EE) { local_paca->irq_happened &= ~PACA_IRQ_EE; - regs.trap = 0x500; + regs.trap = INTERRUPT_EXTERNAL; do_IRQ(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -166,10 +166,7 @@ void replay_soft_interrupts(void) if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { local_paca->irq_happened &= ~PACA_IRQ_DBELL; - if (IS_ENABLED(CONFIG_PPC_BOOK3E)) - regs.trap = 0x280; - else - regs.trap = 0xa00; + regs.trap = INTERRUPT_DOORBELL; doorbell_exception(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -178,7 +175,7 @@ void replay_soft_interrupts(void) /* Book3E does not support soft-masking PMI interrupts */ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { local_paca->irq_happened &= ~PACA_IRQ_PMI; - regs.trap = 0xf00; + regs.trap = INTERRUPT_PERFMON; performance_monitor_exception(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); From 693557ebf407a85ea400a0b501bb97687d8f4856 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Apr 2021 14:02:06 +0000 Subject: [PATCH 261/302] powerpc/inst: ppc_inst_as_u64() becomes ppc_inst_as_ulong() In order to simplify use on PPC32, change ppc_inst_as_u64() into ppc_inst_as_ulong() that returns the 32 bits instruction on PPC32. Will be used when porting OPTPROBES to PPC32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/22cadf29620664b600b82026d2a72b8b23351777.1618927318.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/inst.h | 13 +++++++------ arch/powerpc/kernel/optprobes.c | 2 +- arch/powerpc/lib/code-patching.c | 2 +- arch/powerpc/xmon/xmon.c | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 19e18af2fac9d4..9646c63f74209f 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -147,13 +147,14 @@ static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *va return location + ppc_inst_len(tmp); } -static inline u64 ppc_inst_as_u64(struct ppc_inst x) +static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x) { -#ifdef CONFIG_CPU_LITTLE_ENDIAN - return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x); -#else - return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x); -#endif + if (IS_ENABLED(CONFIG_PPC32)) + return ppc_inst_val(x); + else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x); + else + return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x); } #define PPC_INST_STR_LEN sizeof("00000000 00000000") diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 7f7cdbeacd1ac8..58fdb9f66e0fdf 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -264,7 +264,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * 3. load instruction to be emulated into relevant register, and */ temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); - patch_imm64_load_insns(ppc_inst_as_u64(temp), 4, buff + TMPL_INSN_IDX); + patch_imm64_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 65aec4d6d9ba9c..870b30d9be2f85 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -26,7 +26,7 @@ static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr __put_kernel_nofault(patch_addr, &val, u32, failed); } else { - u64 val = ppc_inst_as_u64(instr); + u64 val = ppc_inst_as_ulong(instr); __put_kernel_nofault(patch_addr, &val, u64, failed); } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index a619b9ed8458af..ff2b92bfeedccf 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2953,7 +2953,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, if (!ppc_inst_prefixed(inst)) dump_func(ppc_inst_val(inst), adr); else - dump_func(ppc_inst_as_u64(inst), adr); + dump_func(ppc_inst_as_ulong(inst), adr); printf("\n"); } return adr - first_adr; From eacf4c0202654adfa94bbb17b5c5c77c0be14af8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Apr 2021 14:02:07 +0000 Subject: [PATCH 262/302] powerpc: Enable OPTPROBES on PPC32 For that, create a 32 bits version of patch_imm64_load_insns() and create a patch_imm_load_insns() which calls patch_imm32_load_insns() on PPC32 and patch_imm64_load_insns() on PPC64. Adapt optprobes_head.S for PPC32. Use PPC_LL/PPC_STL macros instead of raw ld/std, opt out things linked to paca and use stmw/lmw to save/restore registers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bad58c66859b2a475c0ad516b53164ae3b4853cd.1618927318.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 2 +- arch/powerpc/kernel/optprobes.c | 24 ++++++++-- arch/powerpc/kernel/optprobes_head.S | 65 +++++++++++++++------------- 3 files changed, 56 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 475d77a6ebbe64..d2e31a578e26ef 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -229,7 +229,7 @@ config PPC select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC64 && PPC_BOOK3S && SMP - select HAVE_OPTPROBES if PPC64 + select HAVE_OPTPROBES select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 58fdb9f66e0fdf..cdf87086fa33a0 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -141,11 +141,21 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) } } +static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +{ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_RAW_LIS(reg, IMM_H(val)))); + addr++; + + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val)))); +} + /* * Generate instructions to load provided immediate 64-bit value * to register 'reg' and patch these instructions at 'addr'. */ -static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr) { /* lis reg,(op)@highest */ patch_instruction((struct ppc_inst *)addr, @@ -177,6 +187,14 @@ static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t * ___PPC_RS(reg) | (val & 0xffff))); } +static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +{ + if (IS_ENABLED(CONFIG_PPC64)) + patch_imm64_load_insns(val, reg, addr); + else + patch_imm32_load_insns(val, reg, addr); +} + int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { struct ppc_inst branch_op_callback, branch_emulate_step, temp; @@ -230,7 +248,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * Fixup the template with instructions to: * 1. load the address of the actual probepoint */ - patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); + patch_imm_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); /* * 2. branch to optimized_callback() and emulate_step() @@ -264,7 +282,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * 3. load instruction to be emulated into relevant register, and */ temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); - patch_imm64_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index ff8ba4d3824d52..19ea3312403ca3 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -9,6 +9,16 @@ #include #include +#ifdef CONFIG_PPC64 +#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base) +#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base) +#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop +#else +#define SAVE_30GPRS(base) stmw r2, GPR2(base) +#define REST_30GPRS(base) lmw r2, GPR2(base) +#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop +#endif + #define OPT_SLOT_SIZE 65536 .balign 4 @@ -30,39 +40,41 @@ optinsn_slot: .global optprobe_template_entry optprobe_template_entry: /* Create an in-memory pt_regs */ - stdu r1,-INT_FRAME_SIZE(r1) + PPC_STLU r1,-INT_FRAME_SIZE(r1) SAVE_GPR(0,r1) /* Save the previous SP into stack */ addi r0,r1,INT_FRAME_SIZE - std r0,GPR1(r1) - SAVE_10GPRS(2,r1) - SAVE_10GPRS(12,r1) - SAVE_10GPRS(22,r1) + PPC_STL r0,GPR1(r1) + SAVE_30GPRS(r1) /* Save SPRS */ mfmsr r5 - std r5,_MSR(r1) + PPC_STL r5,_MSR(r1) li r5,0x700 - std r5,_TRAP(r1) + PPC_STL r5,_TRAP(r1) li r5,0 - std r5,ORIG_GPR3(r1) - std r5,RESULT(r1) + PPC_STL r5,ORIG_GPR3(r1) + PPC_STL r5,RESULT(r1) mfctr r5 - std r5,_CTR(r1) + PPC_STL r5,_CTR(r1) mflr r5 - std r5,_LINK(r1) + PPC_STL r5,_LINK(r1) mfspr r5,SPRN_XER - std r5,_XER(r1) + PPC_STL r5,_XER(r1) mfcr r5 - std r5,_CCR(r1) + PPC_STL r5,_CCR(r1) +#ifdef CONFIG_PPC64 lbz r5,PACAIRQSOFTMASK(r13) std r5,SOFTE(r1) +#endif /* * We may get here from a module, so load the kernel TOC in r2. * The original TOC gets restored when pt_regs is restored * further below. */ +#ifdef CONFIG_PPC64 ld r2,PACATOC(r13) +#endif .global optprobe_template_op_address optprobe_template_op_address: @@ -70,11 +82,8 @@ optprobe_template_op_address: * Parameters to optimized_callback(): * 1. optimized_kprobe structure in r3 */ - nop - nop - nop - nop - nop + TEMPLATE_FOR_IMM_LOAD_INSNS + /* 2. pt_regs pointer in r4 */ addi r4,r1,STACK_FRAME_OVERHEAD @@ -92,11 +101,7 @@ optprobe_template_call_handler: .global optprobe_template_insn optprobe_template_insn: /* 2, Pass instruction to be emulated in r4 */ - nop - nop - nop - nop - nop + TEMPLATE_FOR_IMM_LOAD_INSNS .global optprobe_template_call_emulate optprobe_template_call_emulate: @@ -107,20 +112,18 @@ optprobe_template_call_emulate: * All done. * Now, restore the registers... */ - ld r5,_MSR(r1) + PPC_LL r5,_MSR(r1) mtmsr r5 - ld r5,_CTR(r1) + PPC_LL r5,_CTR(r1) mtctr r5 - ld r5,_LINK(r1) + PPC_LL r5,_LINK(r1) mtlr r5 - ld r5,_XER(r1) + PPC_LL r5,_XER(r1) mtxer r5 - ld r5,_CCR(r1) + PPC_LL r5,_CCR(r1) mtcr r5 REST_GPR(0,r1) - REST_10GPRS(2,r1) - REST_10GPRS(12,r1) - REST_10GPRS(22,r1) + REST_30GPRS(r1) /* Restore the previous SP */ addi r1,r1,INT_FRAME_SIZE From 92d9d61be519f32f16c07602db5bcbe30a0836fe Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 7 Apr 2021 10:28:16 +0530 Subject: [PATCH 263/302] powerpc/mce: save ignore_event flag unconditionally for UE When we hit an UE while using machine check safe copy routines, ignore_event flag is set and the event is ignored by mce handler, And the flag is also saved for defered handling and printing of mce event information, But as of now saving of this flag is done on checking if the effective address is provided or physical address is calculated, which is not right. Save ignore_event flag regardless of whether the effective address is provided or physical address is calculated. Without this change following log is seen, when the event is to be ignored. [ 512.971365] MCE: CPU1: machine check (Severe) UE Load/Store [Recovered] [ 512.971509] MCE: CPU1: NIP: [c0000000000b67c0] memcpy+0x40/0x90 [ 512.971655] MCE: CPU1: Initiator CPU [ 512.971739] MCE: CPU1: Unknown [ 512.972209] MCE: CPU1: machine check (Severe) UE Load/Store [Recovered] [ 512.972334] MCE: CPU1: NIP: [c0000000000b6808] memcpy+0x88/0x90 [ 512.972456] MCE: CPU1: Initiator CPU [ 512.972534] MCE: CPU1: Unknown Signed-off-by: Ganesh Goudar Reviewed-by: Santosh Sivaraj Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210407045816.352276-1-ganeshgr@linux.ibm.com --- arch/powerpc/kernel/mce.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 6aa6b1cda1edda..9a3c2a84a2acab 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -131,6 +131,8 @@ void save_mce_event(struct pt_regs *regs, long handled, * Populate the mce error_type and type-specific error_type. */ mce_set_error_info(mce, mce_err); + if (mce->error_type == MCE_ERROR_TYPE_UE) + mce->u.ue_error.ignore_event = mce_err->ignore_event; if (!addr) return; @@ -159,7 +161,6 @@ void save_mce_event(struct pt_regs *regs, long handled, if (phys_addr != ULONG_MAX) { mce->u.ue_error.physical_address_provided = true; mce->u.ue_error.physical_address = phys_addr; - mce->u.ue_error.ignore_event = mce_err->ignore_event; machine_check_ue_event(mce); } } From f56607e85ee38f2a5bb7096e24e2d40f35d714f9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 31 Mar 2021 13:59:17 +0000 Subject: [PATCH 264/302] selftests/timens: Fix gettime_perf to work on powerpc On powerpc: - VDSO library is named linux-vdso32.so.1 or linux-vdso64.so.1 - clock_gettime is named __kernel_clock_gettime() Ensure gettime_perf tries these names before giving up. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/469f37ab91984309eb68c0fb47e8438cdf5b6463.1617198956.git.christophe.leroy@csgroup.eu --- tools/testing/selftests/timens/gettime_perf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c index 7bf841a3967baf..6b13dc27772412 100644 --- a/tools/testing/selftests/timens/gettime_perf.c +++ b/tools/testing/selftests/timens/gettime_perf.c @@ -25,12 +25,20 @@ static void fill_function_pointers(void) if (!vdso) vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso32.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso64.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); if (!vdso) { pr_err("[WARN]\tfailed to find vDSO\n"); return; } vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + if (!vdso_clock_gettime) + vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__kernel_clock_gettime"); if (!vdso_clock_gettime) pr_err("Warning: failed to find clock_gettime in vDSO\n"); From 867e762480f4ad4106b16299a373fa23eccf5b4b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 22 Jan 2021 07:15:03 +0000 Subject: [PATCH 265/302] powerpc/32: Use r2 in wrtspr() instead of r0 wrtspr() is a function to write an arbitrary value in a special register. It is used on 8xx to write to SPRN_NRI, SPRN_EID and SPRN_EIE. Writing any value to one of those will play with MSR EE and MSR RI regardless of that value. r0 is used many places in the generated code and using r0 for that creates an unnecessary dependency of this instruction with preceding ones using r0 in a few places in vmlinux. r2 is most likely the most stable register as it contains the pointer to 'current'. Using r2 instead of r0 avoids that unnecessary dependency. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/69f9968f4b592fefda55227f0f7430ea612cc950.1611299687.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/reg.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 9086a2644c89b9..7c81d3e563b28e 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1394,8 +1394,7 @@ static inline void mtmsr_isync(unsigned long val) : "r" ((unsigned long)(v)) \ : "memory") #endif -#define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",0" : \ - : : "memory") +#define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",2" : : : "memory") static inline void wrtee(unsigned long val) { From d228cc4969663623e6b5a749b02e4619352a0a8d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 12 Apr 2021 11:44:16 +0000 Subject: [PATCH 266/302] powerpc/ebpf32: Fix comment on BPF_ALU{64} | BPF_LSH | BPF_K Replace <<== by <<= Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/34d12a4f75cb8b53a925fada5e7ddddd3b145203.1618227846.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 003843273b43ee..ca6fe1583460d5 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -559,12 +559,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0)); break; - case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ + case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */ if (!imm) break; EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); break; - case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ + case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<= imm */ if (imm < 0) return -EINVAL; if (!imm) From e7de0023e1232f42a10ef6af03352538cc27eaf6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 12 Apr 2021 11:44:17 +0000 Subject: [PATCH 267/302] powerpc/ebpf32: Rework 64 bits shifts to avoid tests and branches Re-implement BPF_ALU64 | BPF_{LSH/RSH/ARSH} | BPF_X with branchless implementation copied from misc_32.S. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/03167350b05b2fe8b741e53363ee37709d0f878d.1618227846.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp32.c | 39 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index ca6fe1583460d5..ef21b09df76ed8 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -548,16 +548,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); break; case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ - EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32)); - PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4); - EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg, __REG_R0)); - EMIT(PPC_RAW_LI(dst_reg, 0)); - PPC_JMP((ctx->idx + 6) * 4); + bpf_set_seen_register(ctx, tmp_reg); EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0)); - EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg)); EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg)); break; case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */ if (!imm) @@ -585,16 +584,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); break; case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ - EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32)); - PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4); - EMIT(PPC_RAW_SRW(dst_reg, dst_reg_h, __REG_R0)); - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - PPC_JMP((ctx->idx + 6) * 4); - EMIT(PPC_RAW_SUBFIC(0, src_reg, 32)); + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); - EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); break; case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ if (!imm) @@ -622,16 +620,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg, src_reg)); break; case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ - EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32)); - PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4); - EMIT(PPC_RAW_SRAW(dst_reg, dst_reg_h, __REG_R0)); - EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31)); - PPC_JMP((ctx->idx + 6) * 4); - EMIT(PPC_RAW_SUBFIC(0, src_reg, 32)); + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); - EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26)); + EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg)); + EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); break; case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ if (!imm) From ee7c3ec3b4b1222b30272624897826bc40d79bc5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 12 Apr 2021 11:44:18 +0000 Subject: [PATCH 268/302] powerpc/ebpf32: Use standard function call for functions within 32M distance If the target of a function call is within 32 Mbytes distance, use a standard function call with 'bl' instead of the 'lis/ori/mtlr/blrl' sequence. In the first pass, no memory has been allocated yet and the code position is not known yet (image pointer is NULL). This pass is there to calculate the amount of memory to allocate for the EBPF code, so assume the 4 instructions sequence is required, so that enough memory is allocated. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/74944a1e3e5cfecc141e440a6ccd37920e186b70.1618227846.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 1 + arch/powerpc/net/bpf_jit.h | 3 +++ arch/powerpc/net/bpf_jit_comp32.c | 16 +++++++++++----- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 5b60020dc1f43c..ac41776661e963 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -265,6 +265,7 @@ #define PPC_INST_ORI 0x60000000 #define PPC_INST_ORIS 0x64000000 #define PPC_INST_BRANCH 0x48000000 +#define PPC_INST_BL 0x48000001 #define PPC_INST_BRANCH_COND 0x40800000 /* Prefixes */ diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 776abef4d2a0a7..99fad093f43ec1 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -26,6 +26,9 @@ /* Long jump; (unconditional 'branch') */ #define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ (((dest) - (ctx->idx * 4)) & 0x03fffffc)) +/* blr; (unconditional 'branch' with link) to absolute address */ +#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \ + (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) /* "cond" here covers BO:BI fields. */ #define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ (((cond) & 0x3ff) << 16) | \ diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index ef21b09df76ed8..bbb16099e8c7fa 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -187,11 +187,17 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { - /* Load function address into r0 */ - EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); - EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); - EMIT(PPC_RAW_MTLR(__REG_R0)); - EMIT(PPC_RAW_BLRL()); + s32 rel = (s32)func - (s32)(image + ctx->idx); + + if (image && rel < 0x2000000 && rel >= -0x2000000) { + PPC_BL_ABS(func); + } else { + /* Load function address into r0 */ + EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); + EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); + EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_BLRL()); + } } static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) From 6ac7897f08e04b47df3955d7691652e9d12d4068 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Apr 2021 13:08:40 +0000 Subject: [PATCH 269/302] powerpc: Remove probe_user_read_inst() Its name comes from former probe_user_read() function. That function is now called copy_from_user_nofault(). probe_user_read_inst() uses copy_from_user_nofault() to read only a few bytes. It is suboptimal. It does the same as get_user_inst() but in addition disables page faults. But on the other hand, it is not used for the time being. So remove it for now. If one day it is really needed, we can give it a new name more in line with today's naming, and implement it using get_user_inst() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5f6f82572242a59bfee1e19a71194d8f7ef5fca4.1618405715.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/inst.h | 3 --- arch/powerpc/lib/inst.c | 31 ------------------------------- 2 files changed, 34 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 9646c63f74209f..b7709470e8e9b1 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -176,9 +176,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins __str; \ }) -int probe_user_read_inst(struct ppc_inst *inst, - struct ppc_inst __user *nip); - int probe_kernel_read_inst(struct ppc_inst *inst, struct ppc_inst *src); diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index 9cc17eb62462a6..c57b3548de37bc 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -9,24 +9,6 @@ #include #ifdef CONFIG_PPC64 -int probe_user_read_inst(struct ppc_inst *inst, - struct ppc_inst __user *nip) -{ - unsigned int val, suffix; - int err; - - err = copy_from_user_nofault(&val, nip, sizeof(val)); - if (err) - return err; - if (get_op(val) == OP_PREFIX) { - err = copy_from_user_nofault(&suffix, (void __user *)nip + 4, 4); - *inst = ppc_inst_prefix(val, suffix); - } else { - *inst = ppc_inst(val); - } - return err; -} - int probe_kernel_read_inst(struct ppc_inst *inst, struct ppc_inst *src) { @@ -45,19 +27,6 @@ int probe_kernel_read_inst(struct ppc_inst *inst, return err; } #else /* !CONFIG_PPC64 */ -int probe_user_read_inst(struct ppc_inst *inst, - struct ppc_inst __user *nip) -{ - unsigned int val; - int err; - - err = copy_from_user_nofault(&val, nip, sizeof(val)); - if (!err) - *inst = ppc_inst(val); - - return err; -} - int probe_kernel_read_inst(struct ppc_inst *inst, struct ppc_inst *src) { From 6449078d50111c839bb7156c3b99b9def80eed42 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Apr 2021 13:08:41 +0000 Subject: [PATCH 270/302] powerpc: Make probe_kernel_read_inst() common to PPC32 and PPC64 We have two independant versions of probe_kernel_read_inst(), one for PPC32 and one for PPC64. The PPC32 is identical to the first part of the PPC64 version. The remaining part of PPC64 version is not relevant for PPC32, but not contradictory, so we can easily have a common function with the PPC64 part opted out via a IS_ENABLED(CONFIG_PPC64). The only need is to add a version of ppc_inst_prefix() for PPC32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f7b9dfddef3b3760182c7e5466356c121a293dc9.1618405715.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/inst.h | 2 ++ arch/powerpc/lib/inst.c | 17 +---------------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index b7709470e8e9b1..539117b0a2dc3a 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -102,6 +102,8 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) #define ppc_inst(x) ((struct ppc_inst){ .val = x }) +#define ppc_inst_prefix(x, y) ppc_inst(x) + static inline bool ppc_inst_prefixed(struct ppc_inst x) { return false; diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index c57b3548de37bc..0dff3ac2d45fdd 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -8,7 +8,6 @@ #include #include -#ifdef CONFIG_PPC64 int probe_kernel_read_inst(struct ppc_inst *inst, struct ppc_inst *src) { @@ -18,7 +17,7 @@ int probe_kernel_read_inst(struct ppc_inst *inst, err = copy_from_kernel_nofault(&val, src, sizeof(val)); if (err) return err; - if (get_op(val) == OP_PREFIX) { + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { @@ -26,17 +25,3 @@ int probe_kernel_read_inst(struct ppc_inst *inst, } return err; } -#else /* !CONFIG_PPC64 */ -int probe_kernel_read_inst(struct ppc_inst *inst, - struct ppc_inst *src) -{ - unsigned int val; - int err; - - err = copy_from_kernel_nofault(&val, src, sizeof(val)); - if (!err) - *inst = ppc_inst(val); - - return err; -} -#endif /* CONFIG_PPC64 */ From 41d6cf68b5f611934bcc6a7d4a1a2d9bfd04b420 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Apr 2021 13:08:42 +0000 Subject: [PATCH 271/302] powerpc: Rename probe_kernel_read_inst() When probe_kernel_read_inst() was created, it was to mimic probe_kernel_read() function. Since then, probe_kernel_read() has been renamed copy_from_kernel_nofault(). Rename probe_kernel_read_inst() into copy_inst_from_kernel_nofault(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b783d1f7cdb8914992384a669a2af57051b6bdcf.1618405715.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/inst.h | 3 +-- arch/powerpc/kernel/align.c | 2 +- arch/powerpc/kernel/trace/ftrace.c | 22 +++++++++++----------- arch/powerpc/lib/inst.c | 3 +-- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 539117b0a2dc3a..268d3bd073c8ac 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -178,7 +178,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins __str; \ }) -int probe_kernel_read_inst(struct ppc_inst *inst, - struct ppc_inst *src); +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src); #endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 938db36864ddc2..bbb4181621ddca 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -305,7 +305,7 @@ int fix_alignment(struct pt_regs *regs) int r, type; if (is_kernel_addr(regs->nip)) - r = probe_kernel_read_inst(&instr, (void *)regs->nip); + r = copy_inst_from_kernel_nofault(&instr, (void *)regs->nip); else r = __get_user_instr(instr, (void __user *)regs->nip); diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 42761ebec9f755..ffe9537195aa33 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -68,7 +68,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) */ /* read the text we want to modify */ - if (probe_kernel_read_inst(&replaced, (void *)ip)) + if (copy_inst_from_kernel_nofault(&replaced, (void *)ip)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -130,7 +130,7 @@ __ftrace_make_nop(struct module *mod, struct ppc_inst op, pop; /* read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) { + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } @@ -164,7 +164,7 @@ __ftrace_make_nop(struct module *mod, /* When using -mkernel_profile there is no load to jump over */ pop = ppc_inst(PPC_INST_NOP); - if (probe_kernel_read_inst(&op, (void *)(ip - 4))) { + if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { pr_err("Fetching instruction at %lx failed.\n", ip - 4); return -EFAULT; } @@ -197,7 +197,7 @@ __ftrace_make_nop(struct module *mod, * Check what is in the next instruction. We can see ld r2,40(r1), but * on first pass after boot we will see mflr r0. */ - if (probe_kernel_read_inst(&op, (void *)(ip + 4))) { + if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) { pr_err("Fetching op failed.\n"); return -EFAULT; } @@ -349,7 +349,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) return -1; /* New trampoline -- read where this goes */ - if (probe_kernel_read_inst(&op, (void *)tramp)) { + if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) { pr_debug("Fetching opcode failed.\n"); return -1; } @@ -399,7 +399,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) struct ppc_inst op; /* Read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) { + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } @@ -526,10 +526,10 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) struct module *mod = rec->arch.mod; /* read where this goes */ - if (probe_kernel_read_inst(op, ip)) + if (copy_inst_from_kernel_nofault(op, ip)) return -EFAULT; - if (probe_kernel_read_inst(op + 1, ip + 4)) + if (copy_inst_from_kernel_nofault(op + 1, ip + 4)) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { @@ -592,7 +592,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) unsigned long ip = rec->ip; /* read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) return -EFAULT; /* It should be pointing to a nop */ @@ -648,7 +648,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } /* Make sure we have a nop */ - if (probe_kernel_read_inst(&op, ip)) { + if (copy_inst_from_kernel_nofault(&op, ip)) { pr_err("Unable to read ftrace location %p\n", ip); return -EFAULT; } @@ -726,7 +726,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) { + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index 0dff3ac2d45fdd..e554d1357f2f15 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -8,8 +8,7 @@ #include #include -int probe_kernel_read_inst(struct ppc_inst *inst, - struct ppc_inst *src) +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) { unsigned int val, suffix; int err; From 39352430aaa05fbe4ba710231c70b334513078f2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Apr 2021 13:08:43 +0000 Subject: [PATCH 272/302] powerpc: Move copy_inst_from_kernel_nofault() When probe_kernel_read_inst() was created, there was no good place to put it, so a file called lib/inst.c was dedicated for it. Since then, probe_kernel_read_inst() has been renamed copy_inst_from_kernel_nofault(). And mm/maccess.h didn't exist at that time. Today, mm/maccess.h is related to copy_from_kernel_nofault(). Move copy_inst_from_kernel_nofault() into mm/maccess.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9655d8957313906b77b8db5700a0e33ce06f45e5.1618405715.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/Makefile | 2 +- arch/powerpc/lib/inst.c | 26 -------------------------- arch/powerpc/mm/maccess.c | 21 +++++++++++++++++++++ 3 files changed, 22 insertions(+), 27 deletions(-) delete mode 100644 arch/powerpc/lib/inst.c diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index d4efc182662a8c..f2c690ee75d1a4 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o test_code-patching.o +obj-y += alloc.o code-patching.o feature-fixups.o pmem.o test_code-patching.o ifndef CONFIG_KASAN obj-y += string.o memcmp_$(BITS).o diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c deleted file mode 100644 index e554d1357f2f15..00000000000000 --- a/arch/powerpc/lib/inst.c +++ /dev/null @@ -1,26 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2020, IBM Corporation. - */ - -#include -#include -#include -#include - -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) -{ - unsigned int val, suffix; - int err; - - err = copy_from_kernel_nofault(&val, src, sizeof(val)); - if (err) - return err; - if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { - err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); - *inst = ppc_inst_prefix(val, suffix); - } else { - *inst = ppc_inst(val); - } - return err; -} diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index fa9a7a718fc670..a3c30a8840768a 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -3,7 +3,28 @@ #include #include +#include +#include +#include + bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return is_kernel_addr((unsigned long)unsafe_src); } + +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) +{ + unsigned int val, suffix; + int err; + + err = copy_from_kernel_nofault(&val, src, sizeof(val)); + if (err) + return err; + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { + err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return err; +} From 2e341f56a16a71f240c87ec69711aad0d95a704c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 21 Apr 2021 22:54:01 +1000 Subject: [PATCH 273/302] powerpc/fadump: Fix sparse warnings Sparse says: arch/powerpc/kernel/fadump.c:48:16: warning: symbol 'fadump_kobj' was not declared. Should it be static? arch/powerpc/kernel/fadump.c:55:27: warning: symbol 'crash_mrange_info' was not declared. Should it be static? arch/powerpc/kernel/fadump.c:61:27: warning: symbol 'reserved_mrange_info' was not declared. Should it be static? arch/powerpc/kernel/fadump.c:83:12: warning: symbol 'fadump_cma_init' was not declared. Should it be static? And indeed none of them are used outside this file, they can all be made static. Also fadump_kobj needs to be moved inside the ifdef where it's used. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210421125402.1955013-1-mpe@ellerman.id.au --- arch/powerpc/kernel/fadump.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 000e3b7f3fca56..b990075285f57c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -45,22 +45,21 @@ static struct fw_dump fw_dump; static void __init fadump_reserve_crash_area(u64 base); -struct kobject *fadump_kobj; - #ifndef CONFIG_PRESERVE_FA_DUMP +static struct kobject *fadump_kobj; + static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); -struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; +static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ sizeof(struct fadump_memory_range)) static struct fadump_memory_range rngs[RESERVED_RNGS_CNT]; -struct fadump_mrange_info reserved_mrange_info = { "reserved", rngs, - RESERVED_RNGS_SZ, 0, - RESERVED_RNGS_CNT, true }; +static struct fadump_mrange_info +reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true }; static void __init early_init_dt_scan_reserved_ranges(unsigned long node); @@ -80,7 +79,7 @@ static struct cma *fadump_cma; * But for some reason even if it fails we still have the memory reservation * with us and we can still continue doing fadump. */ -int __init fadump_cma_init(void) +static int __init fadump_cma_init(void) { unsigned long long base, size; int rc; From d936f8182e1bd18f5e9e6c5e8d8b69261200ca96 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 21 Apr 2021 22:54:02 +1000 Subject: [PATCH 274/302] powerpc/powernv: Fix type of opal_mpipl_query_tag() addr argument opal_mpipl_query_tag() takes a pointer to a 64-bit value, which firmware writes a value to. As OPAL is traditionally big endian this value will be big endian. This can be confirmed by looking at the implementation in skiboot: static uint64_t opal_mpipl_query_tag(enum opal_mpipl_tags tag, __be64 *tag_val) { ... *tag_val = cpu_to_be64(opal_mpipl_tags[tag]); return OPAL_SUCCESS; } Fix the declaration to annotate that the value is big endian. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210421125402.1955013-2-mpe@ellerman.id.au --- arch/powerpc/include/asm/opal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9986ac34b8e224..c76157237e22ba 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -307,7 +307,7 @@ int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data, s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size); s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr); -s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr); +s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, __be64 *addr); s64 opal_signal_system_reset(s32 cpu); s64 opal_quiesce(u64 shutdown_type, s32 cpu); From 7d946276570755d6b53d29bd100271f18cb8bf95 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 20 Apr 2021 14:22:09 +1000 Subject: [PATCH 275/302] powerpc/64s: Add FA_DUMP to defconfig FA_DUMP (Firmware Assisted Dump) is a powerpc only feature that should be enabled in our defconfig to get some build / test coverage. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210420042209.1641634-1-mpe@ellerman.id.au --- arch/powerpc/configs/ppc64_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 4f05a6652478d0..72b235ef6f3b2b 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -50,6 +50,7 @@ CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_CRASH_DUMP=y +CONFIG_FA_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y From 389586333c0229a4fbc5c1a7f89148d141293682 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 21 Apr 2021 14:06:47 -0700 Subject: [PATCH 276/302] powerpc: make ALTIVEC select PPC_FPU On a kernel config with ALTIVEC=y and PPC_FPU not set/enabled, there are build errors: drivers/cpufreq/pmac32-cpufreq.c:262:2: error: implicit declaration of function 'enable_kernel_fp' [-Werror,-Wimplicit-function-declaration] enable_kernel_fp(); ../arch/powerpc/lib/sstep.c: In function 'do_vec_load': ../arch/powerpc/lib/sstep.c:637:3: error: implicit declaration of function 'put_vr' [-Werror=implicit-function-declaration] 637 | put_vr(rn, &u.v); | ^~~~~~ ../arch/powerpc/lib/sstep.c: In function 'do_vec_store': ../arch/powerpc/lib/sstep.c:660:3: error: implicit declaration of function 'get_vr'; did you mean 'get_oc'? [-Werror=implicit-function-declaration] 660 | get_vr(rn, &u.v); | ^~~~~~ In theory ALTIVEC is independent of PPC_FPU but in practice nobody is going to build such a machine, so make ALTIVEC require PPC_FPU by selecting it. Reported-by: kernel test robot Signed-off-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210421210647.20836-1-rdunlap@infradead.org --- arch/powerpc/platforms/Kconfig.cputype | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9240743caefc27..e4b05667686eb5 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -308,6 +308,7 @@ config PHYS_64BIT config ALTIVEC bool "AltiVec Support" depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 || (PPC_E500MC && PPC64) + select PPC_FPU help This option enables kernel support for the Altivec extensions to the PowerPC processor. The kernel currently supports saving and restoring From 9ccba66d4d2aff9a3909aa77d57ea8b7cc166f3c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Apr 2021 13:32:48 +0000 Subject: [PATCH 277/302] powerpc/64: Fix the definition of the fixmap area At the time being, the fixmap area is defined at the top of the address space or just below KASAN. This definition is not valid for PPC64. For PPC64, use the top of the I/O space. Because of circular dependencies, it is not possible to include asm/fixmap.h in asm/book3s/64/pgtable.h , so define a fixed size AREA at the top of the I/O space for fixmap and ensure during build that the size is big enough. Fixes: 265c3491c4bc ("powerpc: Add support for GENERIC_EARLY_IOREMAP") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0d51620eacf036d683d1a3c41328f69adb601dc0.1618925560.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 +++- arch/powerpc/include/asm/fixmap.h | 9 +++++++++ arch/powerpc/include/asm/nohash/64/pgtable.h | 5 ++++- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 0c89977ec10bf0..a666d561b44d28 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -7,6 +7,7 @@ #ifndef __ASSEMBLY__ #include #include +#include #endif /* @@ -324,7 +325,8 @@ extern unsigned long pci_io_base; #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) #define IOREMAP_START (ioremap_bot) -#define IOREMAP_END (KERN_IO_END) +#define IOREMAP_END (KERN_IO_END - FIXADDR_SIZE) +#define FIXADDR_SIZE SZ_32M /* Advertise special mapping type for AGP */ #define HAVE_PAGE_AGP diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 8d03c16a366354..947b5b9c442411 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -23,12 +23,17 @@ #include #endif +#ifdef CONFIG_PPC64 +#define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE) +#else +#define FIXADDR_SIZE 0 #ifdef CONFIG_KASAN #include #define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) #else #define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) #endif +#endif /* * Here we define all the compile-time 'special' virtual @@ -50,6 +55,7 @@ */ enum fixed_addresses { FIX_HOLE, +#ifdef CONFIG_PPC32 /* reserve the top 128K for early debugging purposes */ FIX_EARLY_DEBUG_TOP = FIX_HOLE, FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1, @@ -72,6 +78,7 @@ enum fixed_addresses { FIX_IMMR_SIZE, #endif /* FIX_PCIE_MCFG, */ +#endif /* CONFIG_PPC32 */ __end_of_permanent_fixed_addresses, #define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE) @@ -98,6 +105,8 @@ enum fixed_addresses { static inline void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && __FIXADDR_SIZE > FIXADDR_SIZE); + if (__builtin_constant_p(idx)) BUILD_BUG_ON(idx >= __end_of_fixed_addresses); else if (WARN_ON(idx >= __end_of_fixed_addresses)) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 6cb8aa35719176..57cd3892bfe052 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -6,6 +6,8 @@ * the ppc64 non-hashed page table. */ +#include + #include #include #include @@ -54,7 +56,8 @@ #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) #define IOREMAP_START (ioremap_bot) -#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE) +#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE - FIXADDR_SIZE) +#define FIXADDR_SIZE SZ_32M /* From 0bd3f9e953bd3636e73d296e9bed11a25c09c118 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Apr 2021 13:32:49 +0000 Subject: [PATCH 278/302] powerpc/legacy_serial: Use early_ioremap() [ 0.000000] ioremap() called early from find_legacy_serial_ports+0x3cc/0x474. Use early_ioremap() instead find_legacy_serial_ports() is called early from setup_arch(), before paging_init(). vmalloc is not available yet, ioremap shouldn't be used that early. Use early_ioremap() and switch to a regular ioremap() later. Signed-off-by: Christophe Leroy Signed-off-by: Christophe Leroy Tested-by: Chris Packham Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/103ed8ee9e5973c958ec1da2d0b0764f69395d01.1618925560.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/legacy_serial.c | 33 +++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index f061e06e9f519a..8b2c1a8553a0ed 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -15,6 +15,7 @@ #include #include #include +#include #undef DEBUG @@ -34,6 +35,7 @@ static struct legacy_serial_info { unsigned int clock; int irq_check_parent; phys_addr_t taddr; + void __iomem *early_addr; } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; static const struct of_device_id legacy_serial_parents[] __initconst = { @@ -325,17 +327,16 @@ static void __init setup_legacy_serial_console(int console) { struct legacy_serial_info *info = &legacy_serial_infos[console]; struct plat_serial8250_port *port = &legacy_serial_ports[console]; - void __iomem *addr; unsigned int stride; stride = 1 << port->regshift; /* Check if a translated MMIO address has been found */ if (info->taddr) { - addr = ioremap(info->taddr, 0x1000); - if (addr == NULL) + info->early_addr = early_ioremap(info->taddr, 0x1000); + if (info->early_addr == NULL) return; - udbg_uart_init_mmio(addr, stride); + udbg_uart_init_mmio(info->early_addr, stride); } else { /* Check if it's PIO and we support untranslated PIO */ if (port->iotype == UPIO_PORT && isa_io_special) @@ -353,6 +354,30 @@ static void __init setup_legacy_serial_console(int console) udbg_uart_setup(info->speed, info->clock); } +static int __init ioremap_legacy_serial_console(void) +{ + struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console]; + struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console]; + void __iomem *vaddr; + + if (legacy_serial_console < 0) + return 0; + + if (!info->early_addr) + return 0; + + vaddr = ioremap(info->taddr, 0x1000); + if (WARN_ON(!vaddr)) + return -ENOMEM; + + udbg_uart_init_mmio(vaddr, 1 << port->regshift); + early_iounmap(info->early_addr, 0x1000); + info->early_addr = NULL; + + return 0; +} +early_initcall(ioremap_legacy_serial_console); + /* * This is called very early, as part of setup_system() or eventually * setup_arch(), basically before anything else in this file. This function From b4ded42268ee3d703da208278342b9901abe145a Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 4 Mar 2021 06:55:37 -0500 Subject: [PATCH 279/302] powerpc/perf: Fix sampled instruction type for larx/stcx Sampled Instruction Event Register (SIER) field [46:48] identifies the sampled instruction type. ISA v3.1 says value of 0b111 for this field as reserved, but in POWER10 it denotes LARX/STCX type which will hopefully be fixed in ISA v3.1 update. Patch fixes the functions to handle type value 7 for CPU_FTR_ARCH_31. Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support") Signed-off-by: Athira Rajeev Reviewed-by: Madhavan Srinivasan [mpe: Avoid reading mmcra until necessary, use early return to deindent if block] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1614858937-1485-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/isa207-common.c | 38 +++++++++++++++++++++++++++---- arch/powerpc/perf/isa207-common.h | 1 + 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4e71a76c7734d8..f92bf5f6b74f14 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -275,11 +275,39 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, sier = mfspr(SPRN_SIER); val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; - if (val == 1 || val == 2) { - idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT; - sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT; + if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31))) + return; + + idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT; + sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT; + + dsrc->val = isa207_find_source(idx, sub_idx); + if (val == 7) { + u64 mmcra; + u32 op_type; + + /* + * Type 0b111 denotes either larx or stcx instruction. Use the + * MMCRA sampling bits [57:59] along with the type value + * to determine the exact instruction type. If the sampling + * criteria is neither load or store, set the type as default + * to NA. + */ + mmcra = mfspr(SPRN_MMCRA); - dsrc->val = isa207_find_source(idx, sub_idx); + op_type = (mmcra >> MMCRA_SAMP_ELIG_SHIFT) & MMCRA_SAMP_ELIG_MASK; + switch (op_type) { + case 5: + dsrc->val |= P(OP, LOAD); + break; + case 7: + dsrc->val |= P(OP, STORE); + break; + default: + dsrc->val |= P(OP, NA); + break; + } + } else { dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE); } } @@ -297,7 +325,7 @@ void isa207_get_mem_weight(u64 *weight, u64 type) if (cpu_has_feature(CPU_FTR_ARCH_31)) mantissa = P10_MMCRA_THR_CTR_MANT(mmcra); - if (val == 0 || val == 7) + if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31))) weight_lat = 0; else weight_lat = mantissa << (2 * exp); diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index ae8d44e325c7e3..4a2cbc3dc047b8 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -220,6 +220,7 @@ /* Bits in MMCRA for PowerISA v2.07 */ #define MMCRA_SAMP_MODE_SHIFT 1 #define MMCRA_SAMP_ELIG_SHIFT 4 +#define MMCRA_SAMP_ELIG_MASK 7 #define MMCRA_THR_CTL_SHIFT 8 #define MMCRA_THR_SEL_SHIFT 16 #define MMCRA_THR_CMP_SHIFT 32 From 66d9b7492887d34c711bc05b36c22438acba51b4 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 4 Mar 2021 01:40:15 -0500 Subject: [PATCH 280/302] powerpc/perf: Fix the threshold event selection for memory events in power10 Memory events (mem-loads and mem-stores) currently use the threshold event selection as issue to finish. Power10 supports issue to complete as part of thresholding which is more appropriate for mem-loads and mem-stores. Hence fix the event code for memory events to use issue to complete. Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support") Signed-off-by: Athira Rajeev Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1614840015-1535-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/power10-events-list.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h index e45dafe818ed41..93be7197d2502a 100644 --- a/arch/powerpc/perf/power10-events-list.h +++ b/arch/powerpc/perf/power10-events-list.h @@ -75,5 +75,5 @@ EVENT(PM_RUN_INST_CMPL_ALT, 0x00002); * thresh end (TE) */ -EVENT(MEM_LOADS, 0x34340401e0); -EVENT(MEM_STORES, 0x343c0401e0); +EVENT(MEM_LOADS, 0x35340401e0); +EVENT(MEM_STORES, 0x353c0401e0); From 0f197ddce403af33aa7f15af55644549778a9988 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 22 Apr 2021 01:17:32 +1000 Subject: [PATCH 281/302] powerpc/64s: Fix mm_cpumask memory ordering comment The memory ordering comment no longer applies, because mm_ctx_id is no longer used anywhere. At best always been difficult to follow. It's better to consider the load on which the slbmte depends on, which the MMU depends on before it can start loading TLBs, rather than a store which may or may not have a subsequent dependency chain to the slbmte. So update the comment and we use the load of the mm's user context ID. This is much more analogous the radix ordering too, which is good. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210421151733.212858-1-npiggin@gmail.com --- arch/powerpc/mm/mmu_context.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 18f20da0d3483f..a857af401738fd 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -43,24 +43,26 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, /* * This full barrier orders the store to the cpumask above vs - * a subsequent operation which allows this CPU to begin loading - * translations for next. + * a subsequent load which allows this CPU/MMU to begin loading + * translations for 'next' from page table PTEs into the TLB. * - * When using the radix MMU that operation is the load of the + * When using the radix MMU, that operation is the load of the * MMU context id, which is then moved to SPRN_PID. * * For the hash MMU it is either the first load from slb_cache - * in switch_slb(), and/or the store of paca->mm_ctx_id in - * copy_mm_to_paca(). + * in switch_slb() to preload the SLBs, or the load of + * get_user_context which loads the context for the VSID hash + * to insert a new SLB, in the SLB fault handler. * * On the other side, the barrier is in mm/tlb-radix.c for - * radix which orders earlier stores to clear the PTEs vs - * the load of mm_cpumask. And pte_xchg which does the same - * thing for hash. + * radix which orders earlier stores to clear the PTEs before + * the load of mm_cpumask to check which CPU TLBs should be + * flushed. For hash, pte_xchg to clear the PTE includes the + * barrier. * - * This full barrier is needed by membarrier when switching - * between processes after store to rq->curr, before user-space - * memory accesses. + * This full barrier is also needed by membarrier when + * switching between processes after store to rq->curr, before + * user-space memory accesses. */ smp_mb(); From 8a87a507714386efc39c3ae6fa24d4f79846b522 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 21 Apr 2021 17:24:03 +0000 Subject: [PATCH 282/302] powerpc/52xx: Fix an invalid ASM expression ('addi' used instead of 'add') AS arch/powerpc/platforms/52xx/lite5200_sleep.o arch/powerpc/platforms/52xx/lite5200_sleep.S: Assembler messages: arch/powerpc/platforms/52xx/lite5200_sleep.S:184: Warning: invalid register expression In the following code, 'addi' is wrong, has to be 'add' /* local udelay in sram is needed */ udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ mullw r12, r12, r11 mftb r13 /* start */ addi r12, r13, r12 /* end */ Fixes: ee983079ce04 ("[POWERPC] MPC5200 low power mode") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cb4cec9131c8577803367f1699209a7e104cec2a.1619025821.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/52xx/lite5200_sleep.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 11475c58ea4319..afee8b1515a8e6 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -181,7 +181,7 @@ sram_code: udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ mullw r12, r12, r11 mftb r13 /* start */ - addi r12, r13, r12 /* end */ + add r12, r13, r12 /* end */ 1: mftb r13 /* current */ cmp cr0, r13, r12 From da650ada100956b0f00aa4fe9ce33103378ce9ca Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 25 Feb 2021 17:19:49 +1100 Subject: [PATCH 283/302] selftests/powerpc: Add uaccess flush test Also based on the RFI and entry flush tests, it counts the L1D misses by doing a syscall that does user access: uname, in this case. Signed-off-by: Thadeu Lima de Souza Cascardo [dja: forward port, rename function] Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210225061949.1213404-1-dja@axtens.net --- .../selftests/powerpc/security/Makefile | 3 +- .../selftests/powerpc/security/flush_utils.c | 13 ++ .../selftests/powerpc/security/flush_utils.h | 3 + .../powerpc/security/uaccess_flush.c | 158 ++++++++++++++++++ 4 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/security/uaccess_flush.c diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index f25e854fe3709a..844d18cd5f9303 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2 +TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2 top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include @@ -13,3 +13,4 @@ $(OUTPUT)/spectre_v2: CFLAGS += -m64 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S $(OUTPUT)/rfi_flush: flush_utils.c $(OUTPUT)/entry_flush: flush_utils.c +$(OUTPUT)/uaccess_flush: flush_utils.c diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c index 0c3c4c40c7fbbc..4d95965cb751f3 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.c +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "utils.h" #include "flush_utils.h" @@ -35,6 +36,18 @@ void syscall_loop(char *p, unsigned long iterations, } } +void syscall_loop_uaccess(char *p, unsigned long iterations, + unsigned long zero_size) +{ + struct utsname utsname; + + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + uname(&utsname); + } +} + static void sigill_handler(int signr, siginfo_t *info, void *unused) { static int warned; diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h index 7a3d60292916ec..e1e68281f7ac2e 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.h +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -16,6 +16,9 @@ void syscall_loop(char *p, unsigned long iterations, unsigned long zero_size); +void syscall_loop_uaccess(char *p, unsigned long iterations, + unsigned long zero_size); + void set_dscr(unsigned long val); #endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */ diff --git a/tools/testing/selftests/powerpc/security/uaccess_flush.c b/tools/testing/selftests/powerpc/security/uaccess_flush.c new file mode 100644 index 00000000000000..cf80f960e38a45 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/uaccess_flush.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + * Copyright 2020 Canonical Ltd. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "flush_utils.h" + +int uaccess_flush_test(void) +{ + char *p; + int repetitions = 10; + int fd, passes = 0, iter, rc = 0; + struct perf_event_read v; + __u64 l1d_misses_total = 0; + unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; + int rfi_flush_orig; + int entry_flush_orig; + int uaccess_flush, uaccess_flush_orig; + + SKIP_IF(geteuid() != 0); + + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + perror("Unable to read powerpc/rfi_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/uaccess_flush", &uaccess_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (rfi_flush_orig != 0) { + if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + perror("error writing to powerpc/rfi_flush debugfs file"); + FAIL_IF(1); + } + } + + if (entry_flush_orig != 0) { + if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + FAIL_IF(1); + } + } + + uaccess_flush = uaccess_flush_orig; + + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); + FAIL_IF(fd < 0); + + p = (char *)memalign(zero_size, CACHELINE_SIZE); + + FAIL_IF(perf_event_enable(fd)); + + // disable L1 prefetching + set_dscr(1); + + iter = repetitions; + + /* + * We expect to see l1d miss for each cacheline access when entry_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + +again: + FAIL_IF(perf_event_reset(fd)); + + syscall_loop_uaccess(p, iterations, zero_size); + + FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); + + if (uaccess_flush && v.l1d_misses >= l1d_misses_expected) + passes++; + else if (!uaccess_flush && v.l1d_misses < (l1d_misses_expected / 2)) + passes++; + + l1d_misses_total += v.l1d_misses; + + while (--iter) + goto again; + + if (passes < repetitions) { + printf("FAIL (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d failures]\n", + uaccess_flush, l1d_misses_total, uaccess_flush ? '<' : '>', + uaccess_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + repetitions - passes, repetitions); + rc = 1; + } else { + printf("PASS (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d pass]\n", + uaccess_flush, l1d_misses_total, uaccess_flush ? '>' : '<', + uaccess_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + passes, repetitions); + } + + if (uaccess_flush == uaccess_flush_orig) { + uaccess_flush = !uaccess_flush_orig; + if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush) < 0) { + perror("error writing to powerpc/uaccess_flush debugfs file"); + return 1; + } + iter = repetitions; + l1d_misses_total = 0; + passes = 0; + goto again; + } + + perf_event_disable(fd); + close(fd); + + set_dscr(0); + + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + perror("unable to restore original value of powerpc/rfi_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush_orig) < 0) { + perror("unable to restore original value of powerpc/uaccess_flush debugfs file"); + return 1; + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(uaccess_flush_test, "uaccess_flush_test"); +} From 421a7483878cf3f356ebb871effe81997a45dda7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 2 Mar 2021 13:09:54 +1100 Subject: [PATCH 284/302] powerpc/configs: Add IBMVNIC to some 64-bit configs This is an IBM specific driver that we should enable to get some build/boot testing. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210302020954.2980046-1-mpe@ellerman.id.au --- arch/powerpc/configs/ppc64_defconfig | 1 + arch/powerpc/configs/pseries_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 72b235ef6f3b2b..701811c91a6f3f 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -178,6 +178,7 @@ CONFIG_CHELSIO_T1=m CONFIG_BE2NET=m CONFIG_IBMVETH=m CONFIG_EHEA=m +CONFIG_IBMVNIC=m CONFIG_E100=y CONFIG_E1000=y CONFIG_E1000E=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 968095d7682c59..50168dde4ea598 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -160,6 +160,7 @@ CONFIG_BE2NET=m CONFIG_S2IO=m CONFIG_IBMVETH=y CONFIG_EHEA=y +CONFIG_IBMVNIC=y CONFIG_E100=y CONFIG_E1000=y CONFIG_E1000E=y From dae4ff8031b49af4721101d6298fc14cb9c16a4c Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 12 Apr 2021 16:52:15 +0530 Subject: [PATCH 285/302] powerpc/selftests/ptrace-hwbreak: Add testcases for 2nd DAWR Message-ID: <20210412112218.128183-2-ravi.bangoria@linux.ibm.com> (raw) Add selftests to test multiple active DAWRs with ptrace interface. Sample o/p: $ ./ptrace-hwbreak ... PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO, len: 6: Ok PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO, len: 6: Ok PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO, len: 6: Ok PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO, len: 6: Ok Signed-off-by: Ravi Bangoria Reviewed-by: Daniel Axtens [mpe: Fix build on older distros] Signed-off-by: Michael Ellerman --- .../selftests/powerpc/ptrace/perf-hwbreak.c | 4 + .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 79 +++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index c1f324afdbf373..9ccf252d4b13fd 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -30,6 +30,10 @@ #include #include "utils.h" +#ifndef PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 +#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20 +#endif + #define MAX_LOOPS 10000 #define DAWR_LENGTH_MAX ((0x3f + 1) * 8) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 2e0d86e0687e15..a0635a3819aa42 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -194,6 +194,18 @@ static void test_workload(void) big_var[rand() % DAWR_MAX_LEN] = 'a'; else cvar = big_var[rand() % DAWR_MAX_LEN]; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */ + cvar = gstruct.b[rand() % B_LEN]; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */ + cvar = gstruct.a[rand() % A_LEN]; } static void check_success(pid_t child_pid, const char *name, const char *type, @@ -417,6 +429,69 @@ static void test_sethwdebug_range_aligned(pid_t child_pid) ptrace_delhwdebug(child_pid, wh); } +static void test_multi_sethwdebug_range(pid_t child_pid) +{ + struct ppc_hw_breakpoint info1, info2; + unsigned long wp_addr1, wp_addr2; + char *name1 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED"; + char *name2 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED"; + int len1, len2; + int wh1, wh2; + + wp_addr1 = (unsigned long)&gstruct.a; + wp_addr2 = (unsigned long)&gstruct.b; + len1 = A_LEN; + len2 = B_LEN; + get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1); + get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */ + wh1 = ptrace_sethwdebug(child_pid, &info1); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */ + wh2 = ptrace_sethwdebug(child_pid, &info2); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name1, "WO", wp_addr1, len1); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name2, "RO", wp_addr2, len2); + + ptrace_delhwdebug(child_pid, wh1); + ptrace_delhwdebug(child_pid, wh2); +} + +static void test_multi_sethwdebug_range_dawr_overlap(pid_t child_pid) +{ + struct ppc_hw_breakpoint info1, info2; + unsigned long wp_addr1, wp_addr2; + char *name = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap"; + int len1, len2; + int wh1, wh2; + + wp_addr1 = (unsigned long)&gstruct.a; + wp_addr2 = (unsigned long)&gstruct.a; + len1 = A_LEN; + len2 = A_LEN; + get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1); + get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */ + wh1 = ptrace_sethwdebug(child_pid, &info1); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */ + wh2 = ptrace_sethwdebug(child_pid, &info2); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr1, len1); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr2, len2); + + ptrace_delhwdebug(child_pid, wh1); + ptrace_delhwdebug(child_pid, wh2); +} + static void test_sethwdebug_range_unaligned(pid_t child_pid) { struct ppc_hw_breakpoint info; @@ -504,6 +579,10 @@ run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) test_sethwdebug_range_unaligned(child_pid); test_sethwdebug_range_unaligned_dar(child_pid); test_sethwdebug_dawr_max_range(child_pid); + if (dbginfo->num_data_bps > 1) { + test_multi_sethwdebug_range(child_pid); + test_multi_sethwdebug_range_dawr_overlap(child_pid); + } } } } From c9cb0afb4eaa03801322f48dad4093979ff45e88 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 12 Apr 2021 16:52:16 +0530 Subject: [PATCH 286/302] powerpc/selftests/perf-hwbreak: Coalesce event creation code perf-hwbreak selftest opens hw-breakpoint event at multiple places for which it has same code repeated. Coalesce that code into a function. Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412112218.128183-3-ravi.bangoria@linux.ibm.com --- .../selftests/powerpc/ptrace/perf-hwbreak.c | 79 +++++++++---------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index 9ccf252d4b13fd..8c54adaa40b11b 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -38,28 +38,46 @@ #define DAWR_LENGTH_MAX ((0x3f + 1) * 8) -static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, - int cpu, int group_fd, - unsigned long flags) +static void perf_event_attr_set(struct perf_event_attr *attr, + __u32 type, __u64 addr, __u64 len, + bool exclude_user) { - attr->size = sizeof(*attr); - return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = type; + attr->bp_addr = addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; + attr->exclude_guest = 1; + attr->exclude_user = exclude_user; + attr->disabled = 1; } -static inline bool breakpoint_test(int len) +static int +perf_process_event_open_exclude_user(__u32 type, __u64 addr, __u64 len, bool exclude_user) { struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, exclude_user); + return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); +} + +static int perf_process_event_open(__u32 type, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, 0); + return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); +} + +static inline bool breakpoint_test(int len) +{ int fd; - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.bp_type = HW_BREAKPOINT_R; /* bp_addr can point anywhere but needs to be aligned */ - attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800; - attr.bp_len = len; - fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + fd = perf_process_event_open(HW_BREAKPOINT_R, (__u64)(&fd) & 0xfffffffffffff800, len); if (fd < 0) return false; close(fd); @@ -79,7 +97,6 @@ static inline bool dawr_supported(void) static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) { int i,j; - struct perf_event_attr attr; size_t res; unsigned long long breaks, needed; int readint; @@ -89,6 +106,7 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) int break_fd; int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */ volatile int *k; + __u64 len; /* align to 0x400 boundary as required by DAWR */ readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) & @@ -98,19 +116,11 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) if (arraytest) ptr = &readintalign[0]; - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.bp_type = readwriteflag; - attr.bp_addr = (__u64)ptr; - attr.bp_len = sizeof(int); - if (arraytest) - attr.bp_len = DAWR_LENGTH_MAX; - attr.exclude_user = exclude_user; - break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + len = arraytest ? DAWR_LENGTH_MAX : sizeof(int); + break_fd = perf_process_event_open_exclude_user(readwriteflag, (__u64)ptr, + len, exclude_user); if (break_fd < 0) { - perror("sys_perf_event_open"); + perror("perf_process_event_open_exclude_user"); exit(1); } @@ -157,7 +167,6 @@ static int runtest_dar_outside(void) void *target; volatile __u16 temp16; volatile __u64 temp64; - struct perf_event_attr attr; int break_fd; unsigned long long breaks; int fail = 0; @@ -169,21 +178,11 @@ static int runtest_dar_outside(void) exit(EXIT_FAILURE); } - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.exclude_kernel = 1; - attr.exclude_hv = 1; - attr.exclude_guest = 1; - attr.bp_type = HW_BREAKPOINT_RW; /* watch middle half of target array */ - attr.bp_addr = (__u64)(target + 2); - attr.bp_len = 4; - break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + break_fd = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)(target + 2), 4); if (break_fd < 0) { free(target); - perror("sys_perf_event_open"); + perror("perf_process_event_open"); exit(EXIT_FAILURE); } From c65c64cc7bbd273121edf96a7a5a0269038ab454 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 12 Apr 2021 16:52:17 +0530 Subject: [PATCH 287/302] powerpc/selftests/perf-hwbreak: Add testcases for 2nd DAWR Extend perf-hwbreak.c selftest to test multiple DAWRs. Also add testcase for testing 512 byte boundary removal. Sample o/p: # ./perf-hwbreak ... TESTED: Process specific, Two events, diff addr TESTED: Process specific, Two events, same addr TESTED: Process specific, Two events, diff addr, one is RO, other is WO TESTED: Process specific, Two events, same addr, one is RO, other is WO TESTED: Systemwide, Two events, diff addr TESTED: Systemwide, Two events, same addr TESTED: Systemwide, Two events, diff addr, one is RO, other is WO TESTED: Systemwide, Two events, same addr, one is RO, other is WO TESTED: Process specific, 512 bytes, unaligned success: perf_hwbreak Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412112218.128183-4-ravi.bangoria@linux.ibm.com --- .../selftests/powerpc/ptrace/perf-hwbreak.c | 552 +++++++++++++++++- 1 file changed, 551 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index 8c54adaa40b11b..ecde2c199f3b18 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -21,8 +21,13 @@ #include #include #include +#include #include #include +#include +#include +#include +#include #include #include #include @@ -38,6 +43,12 @@ #define DAWR_LENGTH_MAX ((0x3f + 1) * 8) +int nprocs; + +static volatile int a = 10; +static volatile int b = 10; +static volatile char c[512 + 8] __attribute__((aligned(512))); + static void perf_event_attr_set(struct perf_event_attr *attr, __u32 type, __u64 addr, __u64 len, bool exclude_user) @@ -72,6 +83,76 @@ static int perf_process_event_open(__u32 type, __u64 addr, __u64 len) return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); } +static int perf_cpu_event_open(long cpu, __u32 type, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, 0); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static void close_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + close(fd[i]); +} + +static unsigned long read_fds(int *fd, int n) +{ + int i; + unsigned long c = 0; + unsigned long count = 0; + size_t res; + + for (i = 0; i < n; i++) { + res = read(fd[i], &c, sizeof(c)); + assert(res == sizeof(unsigned long long)); + count += c; + } + return count; +} + +static void reset_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_RESET); +} + +static void enable_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_ENABLE); +} + +static void disable_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_DISABLE); +} + +static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len) +{ + int i = 0; + + /* Assume online processors are 0 to nprocs for simplisity */ + for (i = 0; i < nprocs; i++) { + fd[i] = perf_cpu_event_open(i, type, addr, len); + if (fd[i] < 0) { + close_fds(fd, i); + return fd[i]; + } + } + return 0; +} + static inline bool breakpoint_test(int len) { int fd; @@ -266,11 +347,467 @@ static int runtest_dar_outside(void) return fail; } +static void multi_dawr_workload(void) +{ + a += 10; + b += 10; + c[512 + 1] += 'a'; +} + +static int test_process_multi_diff_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, diff addr"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_same_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, same addr"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_diff_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, diff addr, one is RO, other is WO"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_same_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, same addr, one is RO, other is WO"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_diff_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, diff addr"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_same_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, same addr"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_diff_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, diff addr, one is RO, other is WO"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_same_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, same addr, one is RO, other is WO"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int runtest_multi_dawr(void) +{ + int ret = 0; + + ret |= test_process_multi_diff_addr(); + ret |= test_process_multi_same_addr(); + ret |= test_process_multi_diff_addr_ro_wo(); + ret |= test_process_multi_same_addr_ro_wo(); + ret |= test_syswide_multi_diff_addr(); + ret |= test_syswide_multi_same_addr(); + ret |= test_syswide_multi_diff_addr_ro_wo(); + ret |= test_syswide_multi_same_addr_ro_wo(); + + return ret; +} + +static int runtest_unaligned_512bytes(void) +{ + unsigned long long breaks = 0; + int fd; + char *desc = "Process specific, 512 bytes, unaligned"; + __u64 addr = (__u64)&c + 8; + size_t res; + + fd = perf_process_event_open(HW_BREAKPOINT_RW, addr, 512); + if (fd < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd, PERF_EVENT_IOC_RESET); + ioctl(fd, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd, PERF_EVENT_IOC_DISABLE); + + res = read(fd, &breaks, sizeof(breaks)); + assert(res == sizeof(unsigned long long)); + + close(fd); + + if (breaks != 2) { + printf("FAILED: %s: %lld != 2\n", desc, breaks); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +/* There is no perf api to find number of available watchpoints. Use ptrace. */ +static int get_nr_wps(bool *arch_31) +{ + struct ppc_debug_info dbginfo; + int child_pid; + + child_pid = fork(); + if (!child_pid) { + int ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + perror("PTRACE_TRACEME failed\n"); + exit(EXIT_FAILURE); + } + kill(getpid(), SIGUSR1); + + sleep(1); + exit(EXIT_SUCCESS); + } + + wait(NULL); + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo)) { + perror("Can't get breakpoint info"); + exit(EXIT_FAILURE); + } + + *arch_31 = !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_ARCH_31); + return dbginfo.num_data_bps; +} + static int runtest(void) { int rwflag; int exclude_user; int ret; + bool dawr = dawr_supported(); + bool arch_31 = false; + int nr_wps = get_nr_wps(&arch_31); /* * perf defines rwflag as two bits read and write and at least @@ -283,7 +820,7 @@ static int runtest(void) return ret; /* if we have the dawr, we can do an array test */ - if (!dawr_supported()) + if (!dawr) continue; ret = runtestsingle(rwflag, exclude_user, 1); if (ret) @@ -292,6 +829,19 @@ static int runtest(void) } ret = runtest_dar_outside(); + if (ret) + return ret; + + if (dawr && nr_wps > 1) { + nprocs = get_nprocs(); + ret = runtest_multi_dawr(); + if (ret) + return ret; + } + + if (dawr && arch_31) + ret = runtest_unaligned_512bytes(); + return ret; } From 290f7d8ce2b1eea5413bb120e0d9d610675b7fba Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 12 Apr 2021 16:52:18 +0530 Subject: [PATCH 288/302] powerpc/selftests: Add selftest to test concurrent perf/ptrace events ptrace and perf watchpoints can't co-exists if their address range overlaps. See commit 29da4f91c0c1 ("powerpc/watchpoint: Don't allow concurrent perf and ptrace events") for more detail. Add selftest for the same. Sample o/p: # ./ptrace-perf-hwbreak test: ptrace-perf-hwbreak tags: git_version:powerpc-5.8-7-118-g937fa174a15d-dirty perf cpu event -> ptrace thread event (Overlapping): Ok perf cpu event -> ptrace thread event (Non-overlapping): Ok perf thread event -> ptrace same thread event (Overlapping): Ok perf thread event -> ptrace same thread event (Non-overlapping): Ok perf thread event -> ptrace other thread event: Ok ptrace thread event -> perf kernel event: Ok ptrace thread event -> perf same thread event (Overlapping): Ok ptrace thread event -> perf same thread event (Non-overlapping): Ok ptrace thread event -> perf other thread event: Ok ptrace thread event -> perf cpu event (Overlapping): Ok ptrace thread event -> perf cpu event (Non-overlapping): Ok ptrace thread event -> perf same thread & cpu event (Overlapping): Ok ptrace thread event -> perf same thread & cpu event (Non-overlapping): Ok ptrace thread event -> perf other thread & cpu event: Ok success: ptrace-perf-hwbreak Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210412112218.128183-5-ravi.bangoria@linux.ibm.com --- .../selftests/powerpc/ptrace/.gitignore | 1 + .../testing/selftests/powerpc/ptrace/Makefile | 2 +- .../powerpc/ptrace/ptrace-perf-hwbreak.c | 659 ++++++++++++++++++ 3 files changed, 661 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore index 0e96150b7c7e92..eb75e5360e312f 100644 --- a/tools/testing/selftests/powerpc/ptrace/.gitignore +++ b/tools/testing/selftests/powerpc/ptrace/.gitignore @@ -14,3 +14,4 @@ perf-hwbreak core-pkey ptrace-pkey ptrace-syscall +ptrace-perf-hwbreak diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 8d3f006c98cc39..a500639da97ab4 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -2,7 +2,7 @@ TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ - perf-hwbreak ptrace-syscall + perf-hwbreak ptrace-syscall ptrace-perf-hwbreak top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c new file mode 100644 index 00000000000000..3344e74a97b4f8 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c @@ -0,0 +1,659 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ptrace.h" + +char data[16]; + +/* Overlapping address range */ +volatile __u64 *ptrace_data1 = (__u64 *)&data[0]; +volatile __u64 *perf_data1 = (__u64 *)&data[4]; + +/* Non-overlapping address range */ +volatile __u64 *ptrace_data2 = (__u64 *)&data[0]; +volatile __u64 *perf_data2 = (__u64 *)&data[8]; + +static unsigned long pid_max_addr(void) +{ + FILE *fp; + char *line, *c; + char addr[100]; + size_t len = 0; + + fp = fopen("/proc/kallsyms", "r"); + if (!fp) { + printf("Failed to read /proc/kallsyms. Exiting..\n"); + exit(EXIT_FAILURE); + } + + while (getline(&line, &len, fp) != -1) { + if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") || + strstr(line, "pid_max_min")) + continue; + + strncpy(addr, line, len < 100 ? len : 100); + c = strchr(addr, ' '); + *c = '\0'; + return strtoul(addr, &c, 16); + } + fclose(fp); + printf("Could not find pix_max. Exiting..\n"); + exit(EXIT_FAILURE); + return -1; +} + +static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len) +{ + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; +} + +static void perf_kernel_event_attr_set(struct perf_event_attr *attr) +{ + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = pid_max_addr(); + attr->bp_len = sizeof(unsigned long); + attr->exclude_user = 1; + attr->exclude_hv = 1; +} + +static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); +} + +static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0); +} + +static int perf_thread_kernel_event_open(pid_t child_pid) +{ + struct perf_event_attr attr; + + perf_kernel_event_attr_set(&attr); + return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); +} + +static int perf_cpu_kernel_event_open(int cpu) +{ + struct perf_event_attr attr; + + perf_kernel_event_attr_set(&attr); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static int child(void) +{ + int ret; + + ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + printf("Error: PTRACE_TRACEME failed\n"); + return 0; + } + kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */ + + return 0; +} + +static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, + __u64 addr, int len) +{ + info->version = 1; + info->trigger_type = type; + info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info->addr = addr; + info->addr2 = addr + len; + info->condition_value = 0; + if (!len) + info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; + else + info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; +} + +static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len) +{ + struct ppc_hw_breakpoint info; + + ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); + return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); +} + +static int test1(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing cpu event by perf) + * if (addr range overlaps) + * fail; + */ + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd > 0 || errno != ENOSPC) + ret = -1; + + close(perf_fd); + return ret; +} + +static int test2(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing cpu event by perf) + * if (addr range does not overlaps) + * allow; + */ + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + +perf_close: + close(perf_fd); + return ret; +} + +static int test3(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the same thread) + * if (addr range overlaps) + * fail; + */ + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd > 0 || errno != ENOSPC) + ret = -1; + + close(perf_fd); + return ret; +} + +static int test4(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the same thread) + * if (addr range does not overlaps) + * fail; + */ + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, + sizeof(*perf_data2)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + +perf_close: + close(perf_fd); + return ret; +} + +static int test5(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the different thread) + * allow; + */ + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); +perf_close: + close(perf_fd); +kill_child: + kill(cpid, SIGINT); + return ret; +} + +static int test6(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread kernel event by perf) + * if (existing thread event by ptrace on the same thread) + * allow; + * -- OR -- + * if (new per cpu kernel event by perf) + * if (existing thread event by ptrace) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_kernel_event_open(child_pid); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + + perf_fd = perf_cpu_kernel_event_open(0); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test7(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test8(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlaps) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, + sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test9(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the other thread) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + close(perf_fd); + +kill_child: + kill(cpid, SIGINT); + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test10(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test11(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlap) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test12(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test13(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlap) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test14(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the other thread) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + close(perf_fd); + +kill_child: + kill(cpid, SIGINT); + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg) +{ + int ret; + + ret = fun(arg); + if (ret) + printf("%s: Error\n", msg); + else + printf("%s: Ok\n", msg); + return ret; +} + +char *desc[14] = { + "perf cpu event -> ptrace thread event (Overlapping)", + "perf cpu event -> ptrace thread event (Non-overlapping)", + "perf thread event -> ptrace same thread event (Overlapping)", + "perf thread event -> ptrace same thread event (Non-overlapping)", + "perf thread event -> ptrace other thread event", + "ptrace thread event -> perf kernel event", + "ptrace thread event -> perf same thread event (Overlapping)", + "ptrace thread event -> perf same thread event (Non-overlapping)", + "ptrace thread event -> perf other thread event", + "ptrace thread event -> perf cpu event (Overlapping)", + "ptrace thread event -> perf cpu event (Non-overlapping)", + "ptrace thread event -> perf same thread & cpu event (Overlapping)", + "ptrace thread event -> perf same thread & cpu event (Non-overlapping)", + "ptrace thread event -> perf other thread & cpu event", +}; + +static int test(pid_t child_pid) +{ + int ret = TEST_PASS; + + ret |= do_test(desc[0], test1, child_pid); + ret |= do_test(desc[1], test2, child_pid); + ret |= do_test(desc[2], test3, child_pid); + ret |= do_test(desc[3], test4, child_pid); + ret |= do_test(desc[4], test5, child_pid); + ret |= do_test(desc[5], test6, child_pid); + ret |= do_test(desc[6], test7, child_pid); + ret |= do_test(desc[7], test8, child_pid); + ret |= do_test(desc[8], test9, child_pid); + ret |= do_test(desc[9], test10, child_pid); + ret |= do_test(desc[10], test11, child_pid); + ret |= do_test(desc[11], test12, child_pid); + ret |= do_test(desc[12], test13, child_pid); + ret |= do_test(desc[13], test14, child_pid); + + return ret; +} + +static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) +{ + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) { + perror("Can't get breakpoint info"); + exit(-1); + } +} + +static int ptrace_perf_hwbreak(void) +{ + int ret; + pid_t child_pid; + struct ppc_debug_info dbginfo; + + child_pid = fork(); + if (!child_pid) + return child(); + + /* parent */ + wait(NULL); /* <-- child (SIGUSR1) */ + + get_dbginfo(child_pid, &dbginfo); + SKIP_IF(dbginfo.num_data_bps <= 1); + + ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + SKIP_IF(ret < 0); + close(ret); + + ret = test(child_pid); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + return ret; +} + +int main(int argc, char *argv[]) +{ + return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak"); +} From f3d03fc748d4e48f4cd8dea1bfeb173cb3b0c19f Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 2 Feb 2021 11:21:36 +0800 Subject: [PATCH 289/302] powerpc/eeh: remove unneeded semicolon Eliminate the following coccicheck warning: ./arch/powerpc/kernel/eeh.c:782:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Reviewed-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1612236096-91154-1-git-send-email-yang.lee@linux.alibaba.com --- arch/powerpc/kernel/eeh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9058a26df29ccc..f24cd53ff26e2e 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -776,7 +776,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat default: eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true); return -EINVAL; - }; + } return 0; } From caea7b833d866e0badf4b12dc41bf9fe6a7295f3 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 2 Feb 2021 11:34:36 +0800 Subject: [PATCH 290/302] powerpc/64s: remove unneeded semicolon Eliminate the following coccicheck warning: ./arch/powerpc/platforms/powernv/setup.c:160:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1612236877-104974-1-git-send-email-yang.lee@linux.alibaba.com --- arch/powerpc/platforms/powernv/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index aadf932c4e61bb..a8db3f15306394 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -157,7 +157,7 @@ static void __init pnv_check_guarded_cores(void) for_each_node_by_type(dn, "cpu") { if (of_property_match_string(dn, "status", "bad") >= 0) bad_count++; - }; + } if (bad_count) { printk(" _ _______________\n"); From 0db11461677aa5105b9ebbd939aee0ceb77a988b Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 8 Feb 2021 18:41:10 +0800 Subject: [PATCH 291/302] selftests/powerpc: remove unneeded semicolon Eliminate the following coccicheck warning: ./tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c:327:4-5: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1612780870-95890-1-git-send-email-yang.lee@linux.alibaba.com --- tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c index 02dffb65de48b6..b099753b50e4af 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c @@ -324,7 +324,7 @@ int compress_file(int argc, char **argv, void *handle) fprintf(stderr, "error: cannot progress; "); fprintf(stderr, "too many faults\n"); exit(-1); - }; + } } fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */ From 7f1fa82d79947dfabb4046e1d787da9db2bc1c20 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 16 Feb 2021 14:33:06 +1100 Subject: [PATCH 292/302] powerpc/iommu: Allocate it_map by vmalloc The IOMMU table uses the it_map bitmap to keep track of allocated DMA pages. This has always been a contiguous array allocated at either the boot time or when a passed through device is returned to the host OS. The it_map memory is allocated by alloc_pages() which allocates contiguous physical memory. Such allocation method occasionally creates a problem when there is no big chunk of memory available (no free memory or too fragmented). On powernv/ioda2 the default DMA window requires 16MB for it_map. This replaces alloc_pages_node() with vzalloc_node() which allocates contiguous block but in virtual memory. This should reduce changes of failure but should not cause other behavioral changes as it_map is only used by the kernel's DMA hooks/api when MMU is on. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210216033307.69863-2-aik@ozlabs.ru --- arch/powerpc/kernel/iommu.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 2168714144348c..91d0ba7559a629 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -718,7 +718,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, { unsigned long sz; static int welcomed = 0; - struct page *page; unsigned int i; struct iommu_pool *p; @@ -727,11 +726,9 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, /* number of bytes needed for the bitmap */ sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); - page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz)); - if (!page) + tbl->it_map = vzalloc_node(sz, nid); + if (!tbl->it_map) panic("iommu_init_table: Can't allocate %ld bytes\n", sz); - tbl->it_map = page_address(page); - memset(tbl->it_map, 0, sz); iommu_table_reserve_pages(tbl, res_start, res_end); @@ -773,8 +770,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, static void iommu_table_free(struct kref *kref) { - unsigned long bitmap_sz; - unsigned int order; struct iommu_table *tbl; tbl = container_of(kref, struct iommu_table, it_kref); @@ -795,12 +790,8 @@ static void iommu_table_free(struct kref *kref) if (!bitmap_empty(tbl->it_map, tbl->it_size)) pr_warn("%s: Unexpected TCEs\n", __func__); - /* calculate bitmap size in bytes */ - bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); - /* free bitmap */ - order = get_order(bitmap_sz); - free_pages((unsigned long) tbl->it_map, order); + vfree(tbl->it_map); /* free table */ kfree(tbl); From 4be518d838809e21354f32087aa9c26efc50b410 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 16 Feb 2021 14:33:07 +1100 Subject: [PATCH 293/302] powerpc/iommu: Do not immediately panic when failed IOMMU table allocation Most platforms allocate IOMMU table structures (specifically it_map) at the boot time and when this fails - it is a valid reason for panic(). However the powernv platform allocates it_map after a device is returned to the host OS after being passed through and this happens long after the host OS booted. It is quite possible to trigger the it_map allocation panic() and kill the host even though it is not necessary - the host OS can still use the DMA bypass mode (requires a tiny fraction of it_map's memory) and even if that fails, the host OS is runnnable as it was without the device for which allocating it_map causes the panic. Instead of immediately crashing in a powernv/ioda2 system, this prints an error and continues. All other platforms still call panic(). Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Leonardo Bras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210216033307.69863-3-aik@ozlabs.ru --- arch/powerpc/kernel/iommu.c | 6 ++++-- arch/powerpc/platforms/cell/iommu.c | 3 ++- arch/powerpc/platforms/pasemi/iommu.c | 4 +++- arch/powerpc/platforms/powernv/pci-ioda.c | 15 ++++++++------- arch/powerpc/platforms/pseries/iommu.c | 10 +++++++--- arch/powerpc/sysdev/dart_iommu.c | 3 ++- 6 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 91d0ba7559a629..42e195aaf23a19 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -727,8 +727,10 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); tbl->it_map = vzalloc_node(sz, nid); - if (!tbl->it_map) - panic("iommu_init_table: Can't allocate %ld bytes\n", sz); + if (!tbl->it_map) { + pr_err("%s: Can't allocate %ld bytes\n", __func__, sz); + return NULL; + } iommu_table_reserve_pages(tbl, res_start, res_end); diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 2124831cf57c08..fa08699aedeb8b 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -486,7 +486,8 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, window->table.it_size = size >> window->table.it_page_shift; window->table.it_ops = &cell_iommu_ops; - iommu_init_table(&window->table, iommu->nid, 0, 0); + if (!iommu_init_table(&window->table, iommu->nid, 0, 0)) + panic("Failed to initialize iommu table"); pr_debug("\tioid %d\n", window->ioid); pr_debug("\tblocksize %ld\n", window->table.it_blocksize); diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index b500a6e47e6b11..5be7242fbd866c 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -146,7 +146,9 @@ static void iommu_table_iobmap_setup(void) */ iommu_table_iobmap.it_blocksize = 4; iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops; - iommu_init_table(&iommu_table_iobmap, 0, 0, 0); + if (!iommu_init_table(&iommu_table_iobmap, 0, 0, 0)) + panic("Failed to initialize iommu table"); + pr_debug(" <- %s\n", __func__); } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index f0f901683a2fe1..66c3c333733468 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1762,7 +1762,8 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, tbl->it_ops = &pnv_ioda1_iommu_ops; pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift; pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; - iommu_init_table(tbl, phb->hose->node, 0, 0); + if (!iommu_init_table(tbl, phb->hose->node, 0, 0)) + panic("Failed to initialize iommu table"); pe->dma_setup_done = true; return; @@ -1930,16 +1931,16 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift; res_end = min(window_size, SZ_4G) >> tbl->it_page_shift; } - iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end); - rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end)) + rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + else + rc = -ENOMEM; if (rc) { - pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", - rc); + pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", rc); iommu_tce_table_put(tbl); - return rc; + tbl = NULL; /* This clears iommu_table_base below */ } - if (!pnv_iommu_bypass_disabled) pnv_pci_ioda2_set_bypass(pe, true); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 5b3050ff0c55ba..0c55b991f665b5 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -638,7 +638,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) iommu_table_setparms(pci->phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - iommu_init_table(tbl, pci->phb->node, 0, 0); + if (!iommu_init_table(tbl, pci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -720,7 +721,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) iommu_table_setparms_lpar(ppci->phb, pdn, tbl, ppci->table_group, dma_window); tbl->it_ops = &iommu_table_lpar_multi_ops; - iommu_init_table(tbl, ppci->phb->node, 0, 0); + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -749,7 +751,9 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) tbl = PCI_DN(dn)->table_group->tables[0]; iommu_table_setparms(phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - iommu_init_table(tbl, phb->node, 0, 0); + if (!iommu_init_table(tbl, phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + set_iommu_table_base(&dev->dev, tbl); return; } diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 6b4a34b36d9879..1d33b7a5ea8326 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -344,7 +344,8 @@ static void iommu_table_dart_setup(void) iommu_table_dart.it_index = 0; iommu_table_dart.it_blocksize = 1; iommu_table_dart.it_ops = &iommu_dart_ops; - iommu_init_table(&iommu_table_dart, -1, 0, 0); + if (!iommu_init_table(&iommu_table_dart, -1, 0, 0)) + panic("Failed to initialize iommu table"); /* Reserve the last page of the DART to avoid possible prefetch * past the DART mapped area From cc7130bf119add37f36238343a593b71ef6ecc1e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 1 Mar 2021 17:36:53 +1100 Subject: [PATCH 294/302] powerpc/iommu: Annotate nested lock for lockdep The IOMMU table is divided into pools for concurrent mappings and each pool has a separate spinlock. When taking the ownership of an IOMMU group to pass through a device to a VM, we lock these spinlocks which triggers a false negative warning in lockdep (below). This fixes it by annotating the large pool's spinlock as a nest lock which makes lockdep not complaining when locking nested locks if the nest lock is locked already. === WARNING: possible recursive locking detected 5.11.0-le_syzkaller_a+fstn1 #100 Not tainted -------------------------------------------- qemu-system-ppc/4129 is trying to acquire lock: c0000000119bddb0 (&(p->lock)/1){....}-{2:2}, at: iommu_take_ownership+0xac/0x1e0 but task is already holding lock: c0000000119bdd30 (&(p->lock)/1){....}-{2:2}, at: iommu_take_ownership+0xac/0x1e0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(p->lock)/1); lock(&(p->lock)/1); === Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210301063653.51003-1-aik@ozlabs.ru --- arch/powerpc/kernel/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 42e195aaf23a19..560be519ad0510 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1088,7 +1088,7 @@ int iommu_take_ownership(struct iommu_table *tbl) spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) - spin_lock(&tbl->pools[i].lock); + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); iommu_table_release_pages(tbl); @@ -1116,7 +1116,7 @@ void iommu_release_ownership(struct iommu_table *tbl) spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) - spin_lock(&tbl->pools[i].lock); + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); memset(tbl->it_map, 0, sz); From ee6b25fa7c037e42cc5f3b5c024b2a779edab6dd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Dec 2020 11:36:08 +0000 Subject: [PATCH 295/302] powerpc/44x: fix spelling mistake in Kconfig "varients" -> "variants" There is a spelling mistake in the Kconfig help text. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201216113608.11812-1-colin.king@canonical.com --- arch/powerpc/platforms/44x/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 7d41e9264510e5..83975ef50975e7 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -5,7 +5,7 @@ config PPC_47x select MPIC help This option enables support for the 47x family of processors and is - not currently compatible with other 44x or 46x varients + not currently compatible with other 44x or 46x variants config BAMBOO bool "Bamboo" From 3c0468d4451eb6b4f6604370639f163f9637a479 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Thu, 18 Mar 2021 14:44:14 -0300 Subject: [PATCH 296/302] powerpc/kernel/iommu: Align size for IOMMU_PAGE_SIZE() to save TCEs Currently both iommu_alloc_coherent() and iommu_free_coherent() align the desired allocation size to PAGE_SIZE, and gets system pages and IOMMU mappings (TCEs) for that value. When IOMMU_PAGE_SIZE < PAGE_SIZE, this behavior may cause unnecessary TCEs to be created for mapping the whole system page. Example: - PAGE_SIZE = 64k, IOMMU_PAGE_SIZE() = 4k - iommu_alloc_coherent() is called for 128 bytes - 1 system page (64k) is allocated - 16 IOMMU pages (16 x 4k) are allocated (16 TCEs used) It would be enough to use a single TCE for this, so 15 TCEs are wasted in the process. Update iommu_*_coherent() to make sure the size alignment happens only for IOMMU_PAGE_SIZE() before calling iommu_alloc() and iommu_free(). Also, on iommu_range_alloc(), replace ALIGN(n, 1 << tbl->it_page_shift) with IOMMU_PAGE_ALIGN(n, tbl), which is easier to read and does the same. Signed-off-by: Leonardo Bras Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210318174414.684630-1-leobras.c@gmail.com --- arch/powerpc/kernel/iommu.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 560be519ad0510..24208cd00aef67 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -889,6 +889,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; + size_t size_io = size; size = PAGE_ALIGN(size); order = get_order(size); @@ -915,8 +916,9 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> tbl->it_page_shift; - io_order = get_iommu_order(size, tbl); + size_io = IOMMU_PAGE_ALIGN(size_io, tbl); + nio_pages = size_io >> tbl->it_page_shift; + io_order = get_iommu_order(size_io, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); if (mapping == DMA_MAPPING_ERROR) { @@ -931,10 +933,9 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, void *vaddr, dma_addr_t dma_handle) { if (tbl) { - unsigned int nio_pages; + size_t size_io = IOMMU_PAGE_ALIGN(size, tbl); + unsigned int nio_pages = size_io >> tbl->it_page_shift; - size = PAGE_ALIGN(size); - nio_pages = size >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); From fc5590fd56c9608f317729b59a56dad2a75d633f Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Thu, 18 Mar 2021 14:44:17 -0300 Subject: [PATCH 297/302] powerpc/kernel/iommu: Use largepool as a last resort when !largealloc As of today, doing iommu_range_alloc() only for !largealloc (npages <= 15) will only be able to use 3/4 of the available pages, given pages on largepool not being available for !largealloc. This could mean some drivers not being able to fully use all the available pages for the DMA window. Add pages on largepool as a last resort for !largealloc, making all pages of the DMA window available. Signed-off-by: Leonardo Bras Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210318174414.684630-2-leobras.c@gmail.com --- arch/powerpc/kernel/iommu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 24208cd00aef67..57d6b85e9b964f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -296,6 +296,15 @@ static unsigned long iommu_range_alloc(struct device *dev, pass++; goto again; + } else if (pass == tbl->nr_pools + 1) { + /* Last resort: try largepool */ + spin_unlock(&pool->lock); + pool = &tbl->large_pool; + spin_lock(&pool->lock); + pool->hint = pool->start; + pass++; + goto again; + } else { /* Give up */ spin_unlock_irqrestore(&(pool->lock), flags); From 30c400886bad4ac1801516683b71d7714bc2b1b1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 24 Apr 2021 10:34:43 +0000 Subject: [PATCH 298/302] powerpc/kasan: Fix shadow start address with modules Modules are now located before kernel, KASAN area has to be extended accordingly. Fixes: 80edc68e0479 ("powerpc/32s: Define a MODULE area below kernel text all the time") Fixes: 9132a2e82adc ("powerpc/8xx: Define a MODULE area below kernel text") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c68163065163f303f5af1e4bbdd9f1ce69f0543e.1619260465.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/kasan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 7355ed05e65eda..3c478e5ef24c2a 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -19,7 +19,7 @@ #define KASAN_SHADOW_SCALE_SHIFT 3 -#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX) +#ifdef CONFIG_MODULES #define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) #else #define KASAN_KERN_START PAGE_OFFSET From ee1bc694fbaec1a662770703fc34a74abf418938 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 25 Apr 2021 21:58:31 +1000 Subject: [PATCH 299/302] powerpc/kvm: Fix build error when PPC_MEM_KEYS/PPC_PSERIES=n lkp reported a randconfig failure: In file included from arch/powerpc/include/asm/book3s/64/pkeys.h:6, from arch/powerpc/kvm/book3s_64_mmu_host.c:15: arch/powerpc/include/asm/book3s/64/hash-pkey.h: In function 'hash__vmflag_to_pte_pkey_bits': >> arch/powerpc/include/asm/book3s/64/hash-pkey.h:10:23: error: 'VM_PKEY_BIT0' undeclared 10 | return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT0 : 0x0UL) | | ^~~~~~~~~~~~ We added the include of book3s/64/pkeys.h for pte_to_hpte_pkey_bits(), but that header on its own should only be included when PPC_MEM_KEYS=y. Instead include linux/pkeys.h, which brings in the right definitions when PPC_MEM_KEYS=y and also provides empty stubs when PPC_MEM_KEYS=n. Fixes: e4e8bc1df691 ("powerpc/kvm: Fix PR KVM with KUAP/MEM_KEYS enabled") Cc: stable@vger.kernel.org # v5.11+ Reported-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210425115831.2818434-1-mpe@ellerman.id.au --- arch/powerpc/kvm/book3s_64_mmu_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 5ac66be1cb3c7b..c3e31fef0be1c8 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -8,11 +8,11 @@ */ #include +#include #include #include #include -#include #include #include #include From adb68c38d8d49a3d60805479c558649bb2182473 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 29 Mar 2021 17:01:03 +0530 Subject: [PATCH 300/302] powerpc/papr_scm: Mark nvdimm as unarmed if needed during probe In case an nvdimm is found to be unarmed during probe then set its NDD_UNARMED flag before nvdimm_create(). This would enforce a read-only access to the ndimm region. Presently even if an nvdimm is unarmed its not marked as read-only on ppc64 guests. The patch updates papr_scm_nvdimm_init() to force query of nvdimm health via __drc_pmem_query_health() and if nvdimm is found to be unarmed then set the nvdimm flag ND_UNARMED for nvdimm_create(). Signed-off-by: Vaibhav Jain Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210329113103.476760-1-vaibhav@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 48de2190211679..ef26fe40efb03c 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -947,6 +947,15 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dimm_flags = 0; set_bit(NDD_LABELING, &dimm_flags); + /* + * Check if the nvdimm is unarmed. No locking needed as we are still + * initializing. Ignore error encountered if any. + */ + __drc_pmem_query_health(p); + + if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK) + set_bit(NDD_UNARMED, &dimm_flags); + p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); if (!p->nvdimm) { From f9cd5f91a897ea0c45d0059ceeb091cee78c6ebe Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 26 Apr 2021 13:35:18 -0700 Subject: [PATCH 301/302] powerpc: Avoid clang uninitialized warning in __get_user_size_allowed Commit 9975f852ce1b ("powerpc/uaccess: Remove calls to __get_user_bad() and __put_user_bad()") switch to BUILD_BUG() in the default case, which leaves x uninitialized. This will not be an issue because the build will be broken in that case but clang does static analysis before it realizes the default case will be done so it warns about x being uninitialized (trimmed for brevity): In file included from mm/mprotect.c:13: In file included from ./include/linux/hugetlb.h:28: In file included from ./include/linux/mempolicy.h:16: ./include/linux/pagemap.h:772:16: warning: variable '__gu_val' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized] if (unlikely(__get_user(c, uaddr) != 0)) ^~~~~~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/uaccess.h:266:2: note: expanded from macro '__get_user' __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/uaccess.h:235:2: note: expanded from macro '__get_user_size_allowed' default: BUILD_BUG(); \ ^~~~~~~ Commit 5cd29b1fd3e8 ("powerpc/uaccess: Use asm goto for get_user when compiler supports it") added an initialization for x because of the same reason. Do the same thing here so there is no warning across all versions of clang. Signed-off-by: Nathan Chancellor Acked-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/1359 Link: https://lore.kernel.org/r/20210426203518.981550-1-nathan@kernel.org --- arch/powerpc/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index a4e791bcd3fe24..a09e4240c5b167 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -232,7 +232,7 @@ do { \ case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break; \ case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break; \ case 8: __get_user_asm2(x, (u64 __user *)ptr, retval); break; \ - default: BUILD_BUG(); \ + default: x = 0; BUILD_BUG(); \ } \ } while (0) From 5256426247837feb8703625bda7fcfc824af04cf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 23 Apr 2021 13:52:10 +0000 Subject: [PATCH 302/302] powerpc/signal32: Fix erroneous SIGSEGV on RT signal return Return of user_read_access_begin() is tested the wrong way, leading to a SIGSEGV when the user address is valid and likely an Oops when the user address is bad. Fix the test. Fixes: 887f3ceb51cd ("powerpc/signal32: Convert do_setcontext[_tm]() to user access block") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a29aadc54c93bcbf069a83615fa102ca0f59c3ae.1619185912.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 94442af383e1f6..915ea2a457a0f5 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -971,7 +971,7 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sigset_t set; struct mcontext __user *mcp; - if (user_read_access_begin(ucp, sizeof(*ucp))) + if (!user_read_access_begin(ucp, sizeof(*ucp))) return -EFAULT; unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed); @@ -1009,7 +1009,7 @@ static int do_setcontext_tm(struct ucontext __user *ucp, u32 cmcp; u32 tm_cmcp; - if (user_read_access_begin(ucp, sizeof(*ucp))) + if (!user_read_access_begin(ucp, sizeof(*ucp))) return -EFAULT; unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);