From 612ee81b9461475b5a5612c2e8d71559dd3c7920 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Fri, 27 Sep 2019 11:50:02 +0530 Subject: [PATCH 001/144] powerpc/papr_scm: Fix an off-by-one check in papr_scm_meta_{get, set} A validation check to prevent out of bounds read/write inside functions papr_scm_meta_{get,set}() is off-by-one that prevent reads and writes to the last byte of the label area. This bug manifests as a failure to probe a dimm when libnvdimm is unable to read the entire config-area as advertised by ND_CMD_GET_CONFIG_SIZE. This usually happens when there are large number of namespaces created in the region backed by the dimm and the label-index spans max possible config-area. An error of the form below usually reported in the kernel logs: [ 255.293912] nvdimm: probe of nmem0 failed with error -22 The patch fixes these validation checks there by letting libnvdimm access the entire config-area. Fixes: 53e80bd042773('powerpc/nvdimm: Add support for multibyte read/write for metadata') Signed-off-by: Vaibhav Jain Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190927062002.3169-1-vaibhav@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 61883291defc38..ee07d0718bf1a0 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -152,7 +152,7 @@ static int papr_scm_meta_get(struct papr_scm_priv *p, int len, read; int64_t ret; - if ((hdr->in_offset + hdr->in_length) >= p->metadata_size) + if ((hdr->in_offset + hdr->in_length) > p->metadata_size) return -EINVAL; for (len = hdr->in_length; len; len -= read) { @@ -206,7 +206,7 @@ static int papr_scm_meta_set(struct papr_scm_priv *p, __be64 data_be; int64_t ret; - if ((hdr->in_offset + hdr->in_length) >= p->metadata_size) + if ((hdr->in_offset + hdr->in_length) > p->metadata_size) return -EINVAL; for (len = hdr->in_length; len; len -= wrote) { From aaa351504449c4babb80753c72920e4b25fbd8a9 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 9 Oct 2019 19:34:29 +0530 Subject: [PATCH 002/144] powerpc/configs: add FADump awareness to skiroot_defconfig FADump is supported on PowerNV platform. To fulfill this support, the petitboot kernel must be FADump aware. Enable config PRESERVE_FA_DUMP to make the petitboot kernel FADump aware. Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/157062986936.23016.10146169203560084401.stgit@hbathini.in.ibm.com --- arch/powerpc/configs/skiroot_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 1253482a67c0d9..1e18454083ff8f 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -46,6 +46,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_IDLE=y CONFIG_HZ_100=y CONFIG_KEXEC=y +CONFIG_PRESERVE_FA_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_NUMA=y # CONFIG_COMPACTION is not set From cd1d55f16d48d97d681d9534170ce712ac1d09e7 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 9 Oct 2019 20:57:20 +0530 Subject: [PATCH 003/144] powerpc: make syntax for FADump config options in kernel/Makefile readable arch/powerpc/kernel/fadump.c file needs to be compiled in if 'config FA_DUMP' or 'config PRESERVE_FA_DUMP' is set. The current syntax achieves that but looks a bit odd. Fix it for better readability. Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/157063484064.11906.3586824898111397624.stgit@hbathini.in.ibm.com --- arch/powerpc/kernel/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index a7ca8fe623686a..4fdd8a3e775b7c 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -78,9 +78,8 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),) -obj-y += fadump.o -endif +obj-$(CONFIG_FA_DUMP) += fadump.o +obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o ifdef CONFIG_PPC32 obj-$(CONFIG_E500) += idle_e500.o endif From f7a678a8fa548fdd9e89e006f35b0fd60b6f3acc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 11 Oct 2019 19:30:39 +1100 Subject: [PATCH 004/144] powerpc/udbg: Make it safe to call udbg_printf() always Make udbg_printf() check if udbg_putc is set, and if not just return. This makes it safe to call udbg_printf() anytime, even when a udbg backend has not been registered, which means we can avoid some ifdefs at call sites. Signed-off-by: Qian Cai [mpe: Split out of larger patch, write change log] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/udbg.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index a384e7c8b01cd8..01595e8cafe7b4 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -120,13 +120,15 @@ int udbg_write(const char *s, int n) #define UDBG_BUFSIZE 256 void udbg_printf(const char *fmt, ...) { - char buf[UDBG_BUFSIZE]; - va_list args; + if (udbg_putc) { + char buf[UDBG_BUFSIZE]; + va_list args; - va_start(args, fmt); - vsnprintf(buf, UDBG_BUFSIZE, fmt, args); - udbg_puts(buf); - va_end(args); + va_start(args, fmt); + vsnprintf(buf, UDBG_BUFSIZE, fmt, args); + udbg_puts(buf); + va_end(args); + } } void __init udbg_progress(char *s, unsigned short hex) From 3b9176e9a874a848afa7eb2f6943639eb18b7a17 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Mon, 15 Jul 2019 14:32:32 -0400 Subject: [PATCH 005/144] powerpc/setup_64: fix -Wempty-body warnings At the beginning of setup_64.c, it has, #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) #else #define DBG(fmt...) #endif where DBG() could be compiled away, and generate warnings, arch/powerpc/kernel/setup_64.c: In function 'initialize_cache_info': arch/powerpc/kernel/setup_64.c:579:49: warning: suggest braces around empty body in an 'if' statement [-Wempty-body] DBG("Argh, can't find dcache properties !\n"); ^ arch/powerpc/kernel/setup_64.c:582:49: warning: suggest braces around empty body in an 'if' statement [-Wempty-body] DBG("Argh, can't find icache properties !\n"); Fix it by using the suggestions from Michael: "Neither of those sites should use DBG(), that's not really early boot code, they should just use pr_warn(). And the other uses of DBG() in initialize_cache_info() should just be removed. In smp_release_cpus() the entry/exit DBG's should just be removed, and the spinning_secondaries line should just be pr_debug(). That would just leave the two calls in early_setup(). If we taught udbg_printf() to return early when udbg_putc is NULL, then we could just call udbg_printf() unconditionally and get rid of the DBG macro entirely." Suggested-by: Michael Ellerman Signed-off-by: Qian Cai [mpe: Split udbg change out into previous patch] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1563215552-8166-1-git-send-email-cai@lca.pw --- arch/powerpc/kernel/setup_64.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 44b4c432a27365..d2af4c228970e6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -68,12 +68,6 @@ #include "setup.h" -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - int spinning_secondaries; u64 ppc64_pft_size; @@ -305,7 +299,7 @@ void __init early_setup(unsigned long dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); + udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr); /* * Do early initialization using the flattened device @@ -362,11 +356,11 @@ void __init early_setup(unsigned long dt_ptr) */ this_cpu_enable_ftrace(); - DBG(" <- early_setup()\n"); + udbg_printf(" <- %s()\n", __func__); #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX /* - * This needs to be done *last* (after the above DBG() even) + * This needs to be done *last* (after the above udbg_printf() even) * * Right after we return from this function, we turn on the MMU * which means the real-mode access trick that btext does will @@ -436,8 +430,6 @@ void smp_release_cpus(void) if (!use_spinloop()) return; - DBG(" -> smp_release_cpus()\n"); - /* All secondary cpus are spinning on a common spinloop, release them * all now so they can start to spin on their individual paca * spinloops. For non SMP kernels, the secondary cpus never get out @@ -456,9 +448,7 @@ void smp_release_cpus(void) break; udelay(1); } - DBG("spinning_secondaries = %d\n", spinning_secondaries); - - DBG(" <- smp_release_cpus()\n"); + pr_debug("spinning_secondaries = %d\n", spinning_secondaries); } #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */ @@ -551,8 +541,6 @@ void __init initialize_cache_info(void) struct device_node *cpu = NULL, *l2, *l3 = NULL; u32 pvr; - DBG(" -> initialize_cache_info()\n"); - /* * All shipping POWER8 machines have a firmware bug that * puts incorrect information in the device-tree. This will @@ -576,10 +564,10 @@ void __init initialize_cache_info(void) */ if (cpu) { if (!parse_cache_info(cpu, false, &ppc64_caches.l1d)) - DBG("Argh, can't find dcache properties !\n"); + pr_warn("Argh, can't find dcache properties !\n"); if (!parse_cache_info(cpu, true, &ppc64_caches.l1i)) - DBG("Argh, can't find icache properties !\n"); + pr_warn("Argh, can't find icache properties !\n"); /* * Try to find the L2 and L3 if any. Assume they are @@ -604,8 +592,6 @@ void __init initialize_cache_info(void) cur_cpu_spec->dcache_bsize = dcache_bsize; cur_cpu_spec->icache_bsize = icache_bsize; - - DBG(" <- initialize_cache_info()\n"); } /* From 29674a1c71be710f8418468aa6a8addd6d1aba1c Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 17 Sep 2019 11:22:30 -0400 Subject: [PATCH 006/144] powerpc/pkeys: remove unused pkey_allows_readwrite pkey_allows_readwrite() was first introduced in the commit 5586cf61e108 ("powerpc: introduce execute-only pkey"), but the usage was removed entirely in the commit a4fcc877d4e1 ("powerpc/pkeys: Preallocate execute-only key"). Found by the "-Wunused-function" compiler warning flag. Fixes: a4fcc877d4e1 ("powerpc/pkeys: Preallocate execute-only key") Signed-off-by: Qian Cai Acked-by: Ram Pai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1568733750-14580-1-git-send-email-cai@lca.pw --- arch/powerpc/mm/book3s64/pkeys.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index ae7fca40e5b37c..59e0ebbd803695 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -307,16 +307,6 @@ void thread_pkey_regs_init(struct thread_struct *thread) write_iamr(pkey_iamr_mask); } -static inline bool pkey_allows_readwrite(int pkey) -{ - int pkey_shift = pkeyshift(pkey); - - if (!is_pkey_enabled(pkey)) - return true; - - return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift)); -} - int __execute_only_pkey(struct mm_struct *mm) { return mm->context.execute_only_pkey; From a9336ddf448b1cba3080195cec2287af3907236c Mon Sep 17 00:00:00 2001 From: Deb McLemore Date: Sun, 20 May 2018 21:04:38 -0500 Subject: [PATCH 007/144] powerpc/powernv: Add queue mechanism for early messages When issuing a BMC soft poweroff during IPL, the poweroff can be lost so the machine would not poweroff. This is because opal messages can be received before the opal-power code registered its notifiers. Fix it by buffering messages. If we receive a message and do not yet have a handler for that type, store the message and replay when a handler for that type is registered. Signed-off-by: Deb McLemore [mpe: Single unlock path in opal_message_notifier_register(), tweak comments/formatting and change log.] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1526868278-4204-1-git-send-email-debmc@linux.vnet.ibm.com --- arch/powerpc/platforms/powernv/opal.c | 86 ++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 38e90270280bf6..b1d064842da404 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -35,6 +35,16 @@ #include "powernv.h" +#define OPAL_MSG_QUEUE_MAX 16 + +struct opal_msg_node { + struct list_head list; + struct opal_msg msg; +}; + +static DEFINE_SPINLOCK(msg_list_lock); +static LIST_HEAD(msg_list); + /* /sys/firmware/opal */ struct kobject *opal_kobj; @@ -50,6 +60,8 @@ struct mcheck_recoverable_range { u64 recover_addr; }; +static int msg_list_size; + static struct mcheck_recoverable_range *mc_recoverable_range; static int mc_recoverable_range_len; @@ -237,6 +249,43 @@ static int __init opal_register_exception_handlers(void) } machine_early_initcall(powernv, opal_register_exception_handlers); +static void queue_replay_msg(void *msg) +{ + struct opal_msg_node *msg_node; + + if (msg_list_size < OPAL_MSG_QUEUE_MAX) { + msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); + if (msg_node) { + INIT_LIST_HEAD(&msg_node->list); + memcpy(&msg_node->msg, msg, sizeof(struct opal_msg)); + list_add_tail(&msg_node->list, &msg_list); + msg_list_size++; + } else + pr_warn_once("message queue no memory\n"); + + if (msg_list_size >= OPAL_MSG_QUEUE_MAX) + pr_warn_once("message queue full\n"); + } +} + +static void dequeue_replay_msg(enum opal_msg_type msg_type) +{ + struct opal_msg_node *msg_node, *tmp; + + list_for_each_entry_safe(msg_node, tmp, &msg_list, list) { + if (be32_to_cpu(msg_node->msg.msg_type) != msg_type) + continue; + + atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type], + msg_type, + &msg_node->msg); + + list_del(&msg_node->list); + kfree(msg_node); + msg_list_size--; + } +} + /* * Opal message notifier based on message type. Allow subscribers to get * notified for specific messgae type. @@ -244,14 +293,30 @@ machine_early_initcall(powernv, opal_register_exception_handlers); int opal_message_notifier_register(enum opal_msg_type msg_type, struct notifier_block *nb) { + int ret; + unsigned long flags; + if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) { pr_warn("%s: Invalid arguments, msg_type:%d\n", __func__, msg_type); return -EINVAL; } - return atomic_notifier_chain_register( - &opal_msg_notifier_head[msg_type], nb); + spin_lock_irqsave(&msg_list_lock, flags); + ret = atomic_notifier_chain_register( + &opal_msg_notifier_head[msg_type], nb); + + /* + * If the registration succeeded, replay any queued messages that came + * in prior to the notifier chain registration. msg_list_lock held here + * to ensure they're delivered prior to any subsequent messages. + */ + if (ret == 0) + dequeue_replay_msg(msg_type); + + spin_unlock_irqrestore(&msg_list_lock, flags); + + return ret; } EXPORT_SYMBOL_GPL(opal_message_notifier_register); @@ -265,6 +330,23 @@ EXPORT_SYMBOL_GPL(opal_message_notifier_unregister); static void opal_message_do_notify(uint32_t msg_type, void *msg) { + unsigned long flags; + bool queued = false; + + spin_lock_irqsave(&msg_list_lock, flags); + if (opal_msg_notifier_head[msg_type].head == NULL) { + /* + * Queue up the msg since no notifiers have registered + * yet for this msg_type. + */ + queue_replay_msg(msg); + queued = true; + } + spin_unlock_irqrestore(&msg_list_lock, flags); + + if (queued) + return; + /* notify subscribers */ atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type], msg_type, msg); From 4f5c5b76cc00ccf5be89a2b9883feba3baf6eb2e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 14 Oct 2019 10:26:34 +1100 Subject: [PATCH 008/144] selftests/powerpc: Reduce sigfuz runtime to ~60s The defaults for the sigfuz test is to run for 4000 iterations, but that can take quite a while and the test harness may kill the test. Reduce the number of iterations to 600, which gives a runtime of roughly 1 minute on a Power8 system. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191013234643.3430-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/signal/sigfuz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/signal/sigfuz.c b/tools/testing/selftests/powerpc/signal/sigfuz.c index dade00c698c285..08f9afe3b95c4c 100644 --- a/tools/testing/selftests/powerpc/signal/sigfuz.c +++ b/tools/testing/selftests/powerpc/signal/sigfuz.c @@ -42,7 +42,7 @@ #include "utils.h" /* Selftest defaults */ -#define COUNT_MAX 4000 /* Number of interactions */ +#define COUNT_MAX 600 /* Number of interactions */ #define THREADS 16 /* Number of threads */ /* Arguments options */ From 96664dee5cf1815777286227b09884b4f019727f Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Sat, 7 Sep 2019 01:11:23 -0500 Subject: [PATCH 009/144] powerpc/xmon: Allow listing and clearing breakpoints in read-only mode Read-only mode should not prevent listing and clearing any active breakpoints. Tested-by: Daniel Axtens Reviewed-by: Daniel Axtens Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190907061124.1947-2-cmr@informatik.wtf --- arch/powerpc/xmon/xmon.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d83364ebc5c5ad..0a0d84261678d8 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1047,10 +1047,6 @@ cmds(struct pt_regs *excp) set_lpp_cmd(); break; case 'b': - if (xmon_is_ro) { - printf(xmon_ro_msg); - break; - } bpt_cmds(); break; case 'C': @@ -1319,11 +1315,16 @@ bpt_cmds(void) struct bpt *bp; cmd = inchar(); + switch (cmd) { #ifndef CONFIG_PPC_8xx static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; int mode; case 'd': /* bd - hardware data breakpoint */ + if (xmon_is_ro) { + printf(xmon_ro_msg); + break; + } if (!ppc_breakpoint_available()) { printf("Hardware data breakpoint not supported on this cpu\n"); break; @@ -1351,6 +1352,10 @@ bpt_cmds(void) break; case 'i': /* bi - hardware instr breakpoint */ + if (xmon_is_ro) { + printf(xmon_ro_msg); + break; + } if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { printf("Hardware instruction breakpoint " "not supported on this cpu\n"); @@ -1409,7 +1414,8 @@ bpt_cmds(void) break; } termch = cmd; - if (!scanhex(&a)) { + + if (xmon_is_ro || !scanhex(&a)) { /* print all breakpoints */ printf(" type address\n"); if (dabr.enabled) { From 69393cb03ccdf29f3b452d3482ef918469d1c098 Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Sat, 7 Sep 2019 01:11:24 -0500 Subject: [PATCH 010/144] powerpc/xmon: Restrict when kernel is locked down Xmon should be either fully or partially disabled depending on the kernel lockdown state. Put xmon into read-only mode for lockdown=integrity and prevent user entry into xmon when lockdown=confidentiality. Xmon checks the lockdown state on every attempted entry: (1) during early xmon'ing (2) when triggered via sysrq (3) when toggled via debugfs (4) when triggered via a previously enabled breakpoint The following lockdown state transitions are handled: (1) lockdown=none -> lockdown=integrity set xmon read-only mode (2) lockdown=none -> lockdown=confidentiality clear all breakpoints, set xmon read-only mode, prevent user re-entry into xmon (3) lockdown=integrity -> lockdown=confidentiality clear all breakpoints, set xmon read-only mode, prevent user re-entry into xmon Suggested-by: Andrew Donnellan Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190907061124.1947-3-cmr@informatik.wtf --- arch/powerpc/xmon/xmon.c | 103 ++++++++++++++++++++++++++++------- include/linux/security.h | 2 + security/lockdown/lockdown.c | 2 + 3 files changed, 86 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0a0d84261678d8..0a438b51dbb52a 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -187,6 +188,8 @@ static void dump_tlb_44x(void); static void dump_tlb_book3e(void); #endif +static void clear_all_bpt(void); + #ifdef CONFIG_PPC64 #define REG "%.16lx" #else @@ -283,10 +286,38 @@ Commands:\n\ " U show uptime information\n" " ? help\n" " # n limit output to n lines per page (for dp, dpa, dl)\n" -" zr reboot\n\ - zh halt\n" +" zr reboot\n" +" zh halt\n" ; +#ifdef CONFIG_SECURITY +static bool xmon_is_locked_down(void) +{ + static bool lockdown; + + if (!lockdown) { + lockdown = !!security_locked_down(LOCKDOWN_XMON_RW); + if (lockdown) { + printf("xmon: Disabled due to kernel lockdown\n"); + xmon_is_ro = true; + } + } + + if (!xmon_is_ro) { + xmon_is_ro = !!security_locked_down(LOCKDOWN_XMON_WR); + if (xmon_is_ro) + printf("xmon: Read-only due to kernel lockdown\n"); + } + + return lockdown; +} +#else /* CONFIG_SECURITY */ +static inline bool xmon_is_locked_down(void) +{ + return false; +} +#endif + static struct pt_regs *xmon_regs; static inline void sync(void) @@ -438,7 +469,10 @@ static bool wait_for_other_cpus(int ncpus) return false; } -#endif /* CONFIG_SMP */ +#else /* CONFIG_SMP */ +static inline void get_output_lock(void) {} +static inline void release_output_lock(void) {} +#endif static inline int unrecoverable_excp(struct pt_regs *regs) { @@ -455,6 +489,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) int cmd = 0; struct bpt *bp; long recurse_jmp[JMP_BUF_LEN]; + bool locked_down; unsigned long offset; unsigned long flags; #ifdef CONFIG_SMP @@ -465,6 +500,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi) local_irq_save(flags); hard_irq_disable(); + locked_down = xmon_is_locked_down(); + if (!fromipi) { tracing_enabled = tracing_is_on(); tracing_off(); @@ -518,7 +555,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if (!fromipi) { get_output_lock(); - excprint(regs); + if (!locked_down) + excprint(regs); if (bp) { printf("cpu 0x%x stopped at breakpoint 0x%tx (", cpu, BP_NUM(bp)); @@ -570,10 +608,14 @@ static int xmon_core(struct pt_regs *regs, int fromipi) } remove_bpts(); disable_surveillance(); - /* for breakpoint or single step, print the current instr. */ - if (bp || TRAP(regs) == 0xd00) - ppc_inst_dump(regs->nip, 1, 0); - printf("enter ? for help\n"); + + if (!locked_down) { + /* for breakpoint or single step, print curr insn */ + if (bp || TRAP(regs) == 0xd00) + ppc_inst_dump(regs->nip, 1, 0); + printf("enter ? for help\n"); + } + mb(); xmon_gate = 1; barrier(); @@ -597,8 +639,9 @@ static int xmon_core(struct pt_regs *regs, int fromipi) spin_cpu_relax(); touch_nmi_watchdog(); } else { - cmd = cmds(regs); - if (cmd != 0) { + if (!locked_down) + cmd = cmds(regs); + if (locked_down || cmd != 0) { /* exiting xmon */ insert_bpts(); xmon_gate = 0; @@ -635,13 +678,16 @@ static int xmon_core(struct pt_regs *regs, int fromipi) "can't continue\n"); remove_bpts(); disable_surveillance(); - /* for breakpoint or single step, print the current instr. */ - if (bp || TRAP(regs) == 0xd00) - ppc_inst_dump(regs->nip, 1, 0); - printf("enter ? for help\n"); + if (!locked_down) { + /* for breakpoint or single step, print current insn */ + if (bp || TRAP(regs) == 0xd00) + ppc_inst_dump(regs->nip, 1, 0); + printf("enter ? for help\n"); + } } - cmd = cmds(regs); + if (!locked_down) + cmd = cmds(regs); insert_bpts(); in_xmon = 0; @@ -670,7 +716,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi) } } #endif - insert_cpu_bpts(); + if (locked_down) + clear_all_bpt(); + else + insert_cpu_bpts(); touch_nmi_watchdog(); local_irq_restore(flags); @@ -3768,6 +3817,11 @@ static void xmon_init(int enable) #ifdef CONFIG_MAGIC_SYSRQ static void sysrq_handle_xmon(int key) { + if (xmon_is_locked_down()) { + clear_all_bpt(); + xmon_init(0); + return; + } /* ensure xmon is enabled */ xmon_init(1); debugger(get_irq_regs()); @@ -3789,7 +3843,6 @@ static int __init setup_xmon_sysrq(void) device_initcall(setup_xmon_sysrq); #endif /* CONFIG_MAGIC_SYSRQ */ -#ifdef CONFIG_DEBUG_FS static void clear_all_bpt(void) { int i; @@ -3807,18 +3860,22 @@ static void clear_all_bpt(void) iabr = NULL; dabr.enabled = 0; } - - printf("xmon: All breakpoints cleared\n"); } +#ifdef CONFIG_DEBUG_FS static int xmon_dbgfs_set(void *data, u64 val) { xmon_on = !!val; xmon_init(xmon_on); /* make sure all breakpoints removed when disabling */ - if (!xmon_on) + if (!xmon_on) { clear_all_bpt(); + get_output_lock(); + printf("xmon: All breakpoints cleared\n"); + release_output_lock(); + } + return 0; } @@ -3844,7 +3901,11 @@ static int xmon_early __initdata; static int __init early_parse_xmon(char *p) { - if (!p || strncmp(p, "early", 5) == 0) { + if (xmon_is_locked_down()) { + xmon_init(0); + xmon_early = 0; + xmon_on = 0; + } else if (!p || strncmp(p, "early", 5) == 0) { /* just "xmon" is equivalent to "xmon=early" */ xmon_init(1); xmon_early = 1; diff --git a/include/linux/security.h b/include/linux/security.h index a8d59d612d274b..79567eacb83429 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -116,12 +116,14 @@ enum lockdown_reason { LOCKDOWN_MODULE_PARAMETERS, LOCKDOWN_MMIOTRACE, LOCKDOWN_DEBUGFS, + LOCKDOWN_XMON_WR, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, LOCKDOWN_BPF_READ, LOCKDOWN_PERF, LOCKDOWN_TRACEFS, + LOCKDOWN_XMON_RW, LOCKDOWN_CONFIDENTIALITY_MAX, }; diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c index 8a10b43daf74e5..c1a9e5f3a19d01 100644 --- a/security/lockdown/lockdown.c +++ b/security/lockdown/lockdown.c @@ -31,12 +31,14 @@ static const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = { [LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters", [LOCKDOWN_MMIOTRACE] = "unsafe mmio", [LOCKDOWN_DEBUGFS] = "debugfs access", + [LOCKDOWN_XMON_WR] = "xmon write access", [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes", [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM", [LOCKDOWN_PERF] = "unsafe use of perf", [LOCKDOWN_TRACEFS] = "use of tracefs", + [LOCKDOWN_XMON_RW] = "xmon read and write access", [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality", }; From 5f5d6e40a01e70b731df843d8b5a61b4b28b19d9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 17 Sep 2019 18:08:51 +0530 Subject: [PATCH 011/144] powerpc/nvdimm: Update vmemmap_populated to check sub-section range With commit: 7cc7867fb061 ("mm/devm_memremap_pages: enable sub-section remap") pmem namespaces are remapped in 2M chunks. On architectures like ppc64 we can map the memmap area using 16MB hugepage size and that can cover a memory range of 16G. While enabling new pmem namespaces, since memory is added in sub-section chunks, before creating a new memmap mapping, kernel should check whether there is an existing memmap mapping covering the new pmem namespace. Currently, this is validated by checking whether the section covering the range is already initialized or not. Considering there can be multiple namespaces in the same section this can result in wrong validation. Update this to check for sub-sections in the range. This is done by checking for all pfns in the range we are mapping. We could optimize this by checking only just one pfn in each sub-section. But since this is not fast-path we keep this simple. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190917123851.22553-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/init_64.c | 54 +++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 4e08246acd79ad..83d8c7122d1327 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -70,31 +70,46 @@ EXPORT_SYMBOL_GPL(kernstart_addr); #ifdef CONFIG_SPARSEMEM_VMEMMAP /* - * Given an address within the vmemmap, determine the pfn of the page that - * represents the start of the section it is within. Note that we have to + * Given an address within the vmemmap, determine the page that + * represents the start of the subsection it is within. Note that we have to * do this by hand as the proffered address may not be correctly aligned. * Subtraction of non-aligned pointers produces undefined results. */ -static unsigned long __meminit vmemmap_section_start(unsigned long page) +static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_addr) { - unsigned long offset = page - ((unsigned long)(vmemmap)); + unsigned long start_pfn; + unsigned long offset = vmemmap_addr - ((unsigned long)(vmemmap)); /* Return the pfn of the start of the section. */ - return (offset / sizeof(struct page)) & PAGE_SECTION_MASK; + start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK; + return pfn_to_page(start_pfn); } /* - * Check if this vmemmap page is already initialised. If any section - * which overlaps this vmemmap page is initialised then this page is - * initialised already. + * Since memory is added in sub-section chunks, before creating a new vmemmap + * mapping, the kernel should check whether there is an existing memmap mapping + * covering the new subsection added. This is needed because kernel can map + * vmemmap area using 16MB pages which will cover a memory range of 16G. Such + * a range covers multiple subsections (2M) + * + * If any subsection in the 16G range mapped by vmemmap is valid we consider the + * vmemmap populated (There is a page table entry already present). We can't do + * a page table lookup here because with the hash translation we don't keep + * vmemmap details in linux page table. */ -static int __meminit vmemmap_populated(unsigned long start, int page_size) +static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size) { - unsigned long end = start + page_size; - start = (unsigned long)(pfn_to_page(vmemmap_section_start(start))); + struct page *start; + unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size; + start = vmemmap_subsection_start(vmemmap_addr); - for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page))) - if (pfn_valid(page_to_pfn((struct page *)start))) + for (; (unsigned long)start < vmemmap_end; start += PAGES_PER_SUBSECTION) + /* + * pfn valid check here is intended to really check + * whether we have any subsection already initialized + * in this range. + */ + if (pfn_valid(page_to_pfn(start))) return 1; return 0; @@ -201,6 +216,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, void *p = NULL; int rc; + /* + * This vmemmap range is backing different subsections. If any + * of that subsection is marked valid, that means we already + * have initialized a page table covering this range and hence + * the vmemmap range is populated. + */ if (vmemmap_populated(start, page_size)) continue; @@ -290,9 +311,10 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, struct page *page; /* - * the section has already be marked as invalid, so - * vmemmap_populated() true means some other sections still - * in this page, so skip it. + * We have already marked the subsection we are trying to remove + * invalid. So if we want to remove the vmemmap range, we + * need to make sure there is no subsection marked valid + * in this range. */ if (vmemmap_populated(start, page_size)) continue; From c1bc6f93f95970f917caaac544a374862e84df52 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 1 Aug 2019 14:58:55 +1000 Subject: [PATCH 012/144] powerpc/configs: Add debug config fragment Add a debug config fragment that we can use to put useful debug options into. It can be used like: # make foo_defconfig # make debug.config Currently the only option included is to enable debugfs SCOM access. Suggested-by: Michael Ellerman Signed-off-by: Andrew Donnellan [mpe: Drop the special targets, just use the fragment directly] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190801045855.5822-1-ajd@linux.ibm.com --- arch/powerpc/configs/debug.config | 1 + 1 file changed, 1 insertion(+) create mode 100644 arch/powerpc/configs/debug.config diff --git a/arch/powerpc/configs/debug.config b/arch/powerpc/configs/debug.config new file mode 100644 index 00000000000000..a14ae1f20d604d --- /dev/null +++ b/arch/powerpc/configs/debug.config @@ -0,0 +1 @@ +CONFIG_SCOM_DEBUGFS=y From 58b12eb28e34d3dd8a2d6743c26bf941ca1fbf37 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 May 2019 18:16:14 +1000 Subject: [PATCH 013/144] powerpc/configs: Rename foo_basic_defconfig to foo_base.config We have several "defconfigs" that are not actually full defconfigs they are just a base set of options which are then merged with other fragments to produce a working defconfig. The most obvious example is corenet_basic_defconfig which only contains one symbol CONFIG_CORENET_GENERIC=y. And in fact if you build it as a "defconfig" that one symbol ends up undefined, because its prerequisites are missing. There is also mpc85xx_base_defconfig which doesn't actually enable CONFIG_PPC_85xx. To avoid confusion, rename these config fragments to "foo_base.config" to make it clearer that they are not full defconfigs and are instaed just fragments that are used to generate real defconfigs. Reported-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190528081614.26096-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 12 ++++++------ .../{corenet_basic_defconfig => corenet_base.config} | 0 .../{mpc85xx_basic_defconfig => mpc85xx_base.config} | 0 .../{mpc86xx_basic_defconfig => mpc86xx_base.config} | 0 4 files changed, 6 insertions(+), 6 deletions(-) rename arch/powerpc/configs/{corenet_basic_defconfig => corenet_base.config} (100%) rename arch/powerpc/configs/{mpc85xx_basic_defconfig => mpc85xx_base.config} (100%) rename arch/powerpc/configs/{mpc86xx_basic_defconfig => mpc86xx_base.config} (100%) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 83522c9fc7b66a..a3ed4f168607be 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -330,32 +330,32 @@ powernv_be_defconfig: PHONY += mpc85xx_defconfig mpc85xx_defconfig: - $(call merge_into_defconfig,mpc85xx_basic_defconfig,\ + $(call merge_into_defconfig,mpc85xx_base.config,\ 85xx-32bit 85xx-hw fsl-emb-nonhw) PHONY += mpc85xx_smp_defconfig mpc85xx_smp_defconfig: - $(call merge_into_defconfig,mpc85xx_basic_defconfig,\ + $(call merge_into_defconfig,mpc85xx_base.config,\ 85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw) PHONY += corenet32_smp_defconfig corenet32_smp_defconfig: - $(call merge_into_defconfig,corenet_basic_defconfig,\ + $(call merge_into_defconfig,corenet_base.config,\ 85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw dpaa) PHONY += corenet64_smp_defconfig corenet64_smp_defconfig: - $(call merge_into_defconfig,corenet_basic_defconfig,\ + $(call merge_into_defconfig,corenet_base.config,\ 85xx-64bit 85xx-smp altivec 85xx-hw fsl-emb-nonhw dpaa) PHONY += mpc86xx_defconfig mpc86xx_defconfig: - $(call merge_into_defconfig,mpc86xx_basic_defconfig,\ + $(call merge_into_defconfig,mpc86xx_base.config,\ 86xx-hw fsl-emb-nonhw) PHONY += mpc86xx_smp_defconfig mpc86xx_smp_defconfig: - $(call merge_into_defconfig,mpc86xx_basic_defconfig,\ + $(call merge_into_defconfig,mpc86xx_base.config,\ 86xx-smp 86xx-hw fsl-emb-nonhw) PHONY += ppc32_allmodconfig diff --git a/arch/powerpc/configs/corenet_basic_defconfig b/arch/powerpc/configs/corenet_base.config similarity index 100% rename from arch/powerpc/configs/corenet_basic_defconfig rename to arch/powerpc/configs/corenet_base.config diff --git a/arch/powerpc/configs/mpc85xx_basic_defconfig b/arch/powerpc/configs/mpc85xx_base.config similarity index 100% rename from arch/powerpc/configs/mpc85xx_basic_defconfig rename to arch/powerpc/configs/mpc85xx_base.config diff --git a/arch/powerpc/configs/mpc86xx_basic_defconfig b/arch/powerpc/configs/mpc86xx_base.config similarity index 100% rename from arch/powerpc/configs/mpc86xx_basic_defconfig rename to arch/powerpc/configs/mpc86xx_base.config From eb8e20f89093b64f48975c74ccb114e6775cee22 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 13 Oct 2019 21:23:51 +1100 Subject: [PATCH 014/144] powerpc/pseries: Mark accumulate_stolen_time() as notrace accumulate_stolen_time() is called prior to interrupt state being reconciled, which can trip the warning in arch_local_irq_restore(): WARNING: CPU: 5 PID: 1017 at arch/powerpc/kernel/irq.c:258 .arch_local_irq_restore+0x9c/0x130 ... NIP .arch_local_irq_restore+0x9c/0x130 LR .rb_start_commit+0x38/0x80 Call Trace: .ring_buffer_lock_reserve+0xe4/0x620 .trace_function+0x44/0x210 .function_trace_call+0x148/0x170 .ftrace_ops_no_ops+0x180/0x1d0 ftrace_call+0x4/0x8 .accumulate_stolen_time+0x1c/0xb0 decrementer_common+0x124/0x160 For now just mark it as notrace. We may change the ordering to call it after interrupt state has been reconciled, but that is a larger change. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191024055932.27940-1-mpe@ellerman.id.au --- arch/powerpc/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 694522308cd51d..968ae97382b4ea 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -232,7 +232,7 @@ static u64 scan_dispatch_log(u64 stop_tb) * Accumulate stolen time by scanning the dispatch trace log. * Called on entry from user mode. */ -void accumulate_stolen_time(void) +void notrace accumulate_stolen_time(void) { u64 sst, ust; unsigned long save_irq_soft_mask = irq_soft_mask_return(); From 82ce028ad26dd075b06285ef61a854a564d910fb Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Oct 2019 15:05:40 +0530 Subject: [PATCH 015/144] powerpc/pseries: Don't opencode HPTE_V_BOLTED No functional change in this patch. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191024093542.29777-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/platforms/pseries/lpar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f87a5c64e24dcf..3126fc02e50bf2 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -774,7 +774,7 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group) /* don't remove a bolted entry */ lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, - (0x1UL << 4), &dummy1, &dummy2); + HPTE_V_BOLTED, &dummy1, &dummy2); if (lpar_rc == H_SUCCESS) return i; From 75838a3290cd4ebbd1f567f310ba04b6ef017ce4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Oct 2019 15:05:41 +0530 Subject: [PATCH 016/144] powerpc/pseries: Don't fail hash page table insert for bolted mapping If the hypervisor returned H_PTEG_FULL for H_ENTER hcall, retry a hash page table insert by removing a random entry from the group. After some runtime, it is very well possible to find all the 8 hash page table entry slot in the hpte group used for mapping. Don't fail a bolted entry insert in that case. With Storage class memory a user can find this error easily since a namespace enable/disable is equivalent to memory add/remove. This results in failures as reported below: $ ndctl create-namespace -r region1 -t pmem -m devdax -a 65536 -s 100M libndctl: ndctl_dax_enable: dax1.3: failed to enable Error: namespace1.2: failed to enable failed to create namespace: No such device or address In kernel log we find the details as below: Unable to create mapping for hot added memory 0xc000042006000000..0xc00004200d000000: -1 dax_pmem: probe of dax1.3 failed with error -14 This indicates that we failed to create a bolted hash table entry for direct-map address backing the namespace. We also observe failures such that not all namespaces will be enabled with ndctl enable-namespace all command. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191024093542.29777-2-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/hash_utils.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 6c123760164e8f..6e5a769ebcb802 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -294,7 +294,14 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, HPTE_V_BOLTED, psize, psize, ssize); - + if (ret == -1) { + /* Try to remove a non bolted entry */ + ret = mmu_hash_ops.hpte_remove(hpteg); + if (ret != -1) + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, + HPTE_V_BOLTED, psize, psize, + ssize); + } if (ret < 0) break; From d78d5dace5398b542fd5a21b50db6e88ce7d392e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Oct 2019 15:05:42 +0530 Subject: [PATCH 017/144] powerpc/book3s64/hash: Use secondary hash for bolted mapping if the primary is full With bolted hash page table entry, kernel currently only use primary hash group when inserting the hash page table entry. In the rare case where kernel find all the 8 primary hash slot occupied by bolted entries, this can result in hash page table insert failure for bolted entries. Avoid this by using the secondary hash group. This is different from what kernel does for the non-bolted mapping. With non-bolted entries kernel will try secondary before removing an existing entry from hash page table group. With bolted prefer primary hash group and hence try to insert the page table entry by removing a slot from primary before trying the secondary hash group. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191024093542.29777-3-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/hash_native.c | 38 ++++++++++++++++++++------ arch/powerpc/mm/book3s64/hash_utils.c | 13 ++++++++- arch/powerpc/platforms/pseries/lpar.c | 14 ++++++++-- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 523e42eb11daa4..d2d8237ea9d59d 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -482,19 +482,12 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, return ret; } -static long native_hpte_find(unsigned long vpn, int psize, int ssize) +static long __native_hpte_find(unsigned long want_v, unsigned long slot) { struct hash_pte *hptep; - unsigned long hash; + unsigned long hpte_v; unsigned long i; - long slot; - unsigned long want_v, hpte_v; - hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); - want_v = hpte_encode_avpn(vpn, psize, ssize); - - /* Bolted mappings are only ever in the primary group */ - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; for (i = 0; i < HPTES_PER_GROUP; i++) { hptep = htab_address + slot; @@ -508,6 +501,33 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) return -1; } +static long native_hpte_find(unsigned long vpn, int psize, int ssize) +{ + unsigned long hpte_group; + unsigned long want_v; + unsigned long hash; + long slot; + + hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); + + /* + * We try to keep bolted entries always in primary hash + * But in some case we can find them in secondary too. + */ + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot = __native_hpte_find(want_v, hpte_group); + if (slot < 0) { + /* Try in secondary */ + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; + slot = __native_hpte_find(want_v, hpte_group); + if (slot < 0) + return -1; + } + + return slot; +} + /* * Update the page protection bits. Intended to be used to create * guard pages for kernel data structures on pages which are bolted diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 6e5a769ebcb802..a9d1f72de8484f 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -263,6 +263,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long vsid = get_kernel_vsid(vaddr, ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, ssize); unsigned long tprot = prot; + bool secondary_hash = false; /* * If we hit a bad address return error. @@ -291,17 +292,27 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); BUG_ON(!mmu_hash_ops.hpte_insert); +repeat: ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, HPTE_V_BOLTED, psize, psize, ssize); if (ret == -1) { - /* Try to remove a non bolted entry */ + /* + * Try to to keep bolted entries in primary. + * Remove non bolted entries and try insert again + */ ret = mmu_hash_ops.hpte_remove(hpteg); if (ret != -1) ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, HPTE_V_BOLTED, psize, psize, ssize); + if (ret == -1 && !secondary_hash) { + secondary_hash = true; + hpteg = ((~hash & htab_hash_mask) * HPTES_PER_GROUP); + goto repeat; + } } + if (ret < 0) break; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 3126fc02e50bf2..74c59a1e96278f 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -938,11 +938,19 @@ static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize) hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); want_v = hpte_encode_avpn(vpn, psize, ssize); - /* Bolted entries are always in the primary group */ + /* + * We try to keep bolted entries always in primary hash + * But in some case we can find them in secondary too. + */ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot = __pSeries_lpar_hpte_find(want_v, hpte_group); - if (slot < 0) - return -1; + if (slot < 0) { + /* Try in secondary */ + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; + slot = __pSeries_lpar_hpte_find(want_v, hpte_group); + if (slot < 0) + return -1; + } return hpte_group + slot; } From a02cbc7ffe529ed58b6bbe54652104fc2c88bd77 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 14 Oct 2019 13:30:43 +1100 Subject: [PATCH 018/144] selftests/powerpc: Fixup clobbers for TM tests Some of our TM (Transactional Memory) tests, list "r1" (the stack pointer) as a clobbered register. GCC >= 9 doesn't accept this, and the build breaks: ptrace-tm-spd-tar.c: In function 'tm_spd_tar': ptrace-tm-spd-tar.c:31:2: error: listing the stack pointer register 'r1' in a clobber list is deprecated [-Werror=deprecated] 31 | asm __volatile__( | ^~~ ptrace-tm-spd-tar.c:31:2: note: the value of the stack pointer after an 'asm' statement must be the same as it was before the statement We do have some fairly large inline asm blocks in these tests, and some of them do change the value of r1. However they should all return to C with the value in r1 restored, so I think it's legitimate to say r1 is not clobbered. As Segher points out, the r1 clobbers may have been added because of the use of `or 1,1,1`, however that doesn't actually clobber r1. Segher also points out that some of these tests do clobber LR, because they call functions, and that is not listed in the clobbers, so add that where appropriate. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191029095324.14669-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c | 2 +- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c | 4 ++-- tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c | 2 +- tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c index 25e23e73c72e6c..2ecfa1158e2bcf 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c @@ -73,7 +73,7 @@ void tm_spd_tar(void) [sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2), [tar_3]"i"(TAR_3), [dscr_3]"i"(DSCR_3) - : "memory", "r0", "r1", "r3", "r4", "r5", "r6" + : "memory", "r0", "r3", "r4", "r5", "r6", "lr" ); /* TM failed, analyse */ diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c index f603fe5a445b3f..6f7fb51f08099c 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c @@ -74,8 +74,8 @@ void tm_spd_vsx(void) "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) : [sprn_texasr] "i" (SPRN_TEXASR) - : "memory", "r0", "r1", "r3", "r4", - "r7", "r8", "r9", "r10", "r11" + : "memory", "r0", "r3", "r4", + "r7", "r8", "r9", "r10", "r11", "lr" ); if (result) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c index e0d37f07bdeba4..46ef378a15ecc6 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c @@ -62,7 +62,7 @@ void tm_tar(void) [sprn_ppr]"i"(SPRN_PPR), [sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2), [cptr1] "b" (&cptr[1]) - : "memory", "r0", "r1", "r3", "r4", "r5", "r6" + : "memory", "r0", "r3", "r4", "r5", "r6" ); /* TM failed, analyse */ diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c index 8027457b97b7c9..70ca01234f79c4 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c @@ -62,8 +62,8 @@ void tm_vsx(void) "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) : [sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "b" (&cptr[1]) - : "memory", "r0", "r1", "r3", "r4", - "r7", "r8", "r9", "r10", "r11" + : "memory", "r0", "r3", "r4", + "r7", "r8", "r9", "r10", "r11", "lr" ); if (result) { From c790c3d2b0ec5979d83451d0688d1cd07e23d8ba Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 20 May 2019 20:55:20 +1000 Subject: [PATCH 019/144] selftests/powerpc: Add a test of spectre_v2 mitigations This test uses the PMU to count branch prediction hits/misses for a known loop, and compare the result to the reported spectre v2 mitigation. This gives us a way of sanity checking that the reported mitigation is actually in effect. Sample output for some cases, eg: Power9: sysfs reports: 'Vulnerable' PM_BR_PRED_CCACHE: result 368 running/enabled 5792777124 PM_BR_MPRED_CCACHE: result 319 running/enabled 5792775546 PM_BR_PRED_PCACHE: result 2147483281 running/enabled 5792773128 PM_BR_MPRED_PCACHE: result 213604201 running/enabled 5792771640 Miss percent 9 % OK - Measured branch prediction rates match reported spectre v2 mitigation. sysfs reports: 'Mitigation: Indirect branch serialisation (kernel only)' PM_BR_PRED_CCACHE: result 895 running/enabled 5780320920 PM_BR_MPRED_CCACHE: result 822 running/enabled 5780312414 PM_BR_PRED_PCACHE: result 2147482754 running/enabled 5780308836 PM_BR_MPRED_PCACHE: result 213639731 running/enabled 5780307912 Miss percent 9 % OK - Measured branch prediction rates match reported spectre v2 mitigation. sysfs reports: 'Mitigation: Indirect branch cache disabled' PM_BR_PRED_CCACHE: result 2147483649 running/enabled 20540186160 PM_BR_MPRED_CCACHE: result 2147483649 running/enabled 20540180056 PM_BR_PRED_PCACHE: result 0 running/enabled 20540176090 PM_BR_MPRED_PCACHE: result 0 running/enabled 20540174182 Miss percent 100 % OK - Measured branch prediction rates match reported spectre v2 mitigation. Power8: sysfs reports: 'Vulnerable' PM_BR_PRED_CCACHE: result 2147483649 running/enabled 3505888142 PM_BR_MPRED_CCACHE: result 9 running/enabled 3505882788 Miss percent 0 % OK - Measured branch prediction rates match reported spectre v2 mitigation. sysfs reports: 'Mitigation: Indirect branch cache disabled' PM_BR_PRED_CCACHE: result 2147483649 running/enabled 16931421988 PM_BR_MPRED_CCACHE: result 2147483649 running/enabled 16931416478 Miss percent 100 % OK - Measured branch prediction rates match reported spectre v2 mitigation. success: spectre_v2 Signed-off-by: Michael Ellerman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190520105520.22274-1-mpe@ellerman.id.au --- .../testing/selftests/powerpc/include/utils.h | 1 + .../selftests/powerpc/security/Makefile | 3 +- .../selftests/powerpc/security/branch_loops.S | 82 +++++++ .../selftests/powerpc/security/spectre_v2.c | 218 ++++++++++++++++++ tools/testing/selftests/powerpc/utils.c | 20 ++ 5 files changed, 323 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/security/branch_loops.S create mode 100644 tools/testing/selftests/powerpc/security/spectre_v2.c diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 0e2b2e6284ac6a..e089a0c30d9af6 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -34,6 +34,7 @@ int pick_online_cpu(void); int read_debugfs_file(char *debugfs_file, int *result); int write_debugfs_file(char *debugfs_file, int result); +int read_sysfs_file(char *debugfs_file, char *result, size_t result_size); void set_dscr(unsigned long val); int perf_event_open_counter(unsigned int type, unsigned long config, int group_fd); diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 85861c46b4457d..d68e6031695c40 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush +TEST_GEN_PROGS := rfi_flush spectre_v2 top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include @@ -8,3 +8,4 @@ CFLAGS += -I../../../../../usr/include include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c +$(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S diff --git a/tools/testing/selftests/powerpc/security/branch_loops.S b/tools/testing/selftests/powerpc/security/branch_loops.S new file mode 100644 index 00000000000000..22e9204e342140 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/branch_loops.S @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2019, Michael Ellerman, IBM Corp. + */ + +#include + + .data + +jump_table: + .long 0x0 + .long (.Lstate_1 - .Lstate_0) + .long (.Lstate_2 - .Lstate_0) + .long (.Lstate_3 - .Lstate_0) + .long (.Lstate_4 - .Lstate_0) + .long (.Lstate_5 - .Lstate_0) + .long (.Lstate_6 - .Lstate_0) + .long (.Lstate_7 - .Lstate_0) + + .text + +#define ITER_SHIFT 31 + +.macro state number + .balign 32 +.Lstate_\number: + .if \number==7 + li r3, 0 + .else + li r3, \number+1 + .endif + b .Lloop +.endm + +FUNC_START(pattern_cache_loop) + li r3, 0 + li r4, 1 + sldi r4, r4, ITER_SHIFT + +.Lloop: cmpdi r4, 0 + beqlr + + addi r4, r4, -1 + + ld r6, jump_table@got(%r2) + sldi r5, r3, 2 + lwax r6, r5, r6 + ld r7, .Lstate_0@got(%r2) + add r6, r6, r7 + mtctr r6 + bctr + + state 0 + state 1 + state 2 + state 3 + state 4 + state 5 + state 6 + state 7 + +FUNC_END(pattern_cache_loop) + + +FUNC_START(indirect_branch_loop) + li r3, 1 + sldi r3, r3, ITER_SHIFT + +1: cmpdi r3, 0 + beqlr + + addi r3, r3, -1 + + ld r4, 2f@got(%r2) + mtctr r4 + bctr + + .balign 32 +2: b 1b + +FUNC_END(indirect_branch_loop) diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c new file mode 100644 index 00000000000000..8c6b982af2a899 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018-2019 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" + +#include "../pmu/event.h" + + +extern void pattern_cache_loop(void); +extern void indirect_branch_loop(void); + +static int do_count_loop(struct event *events, bool is_p9, s64 *miss_percent) +{ + u64 pred, mpred; + + prctl(PR_TASK_PERF_EVENTS_ENABLE); + + if (is_p9) + pattern_cache_loop(); + else + indirect_branch_loop(); + + prctl(PR_TASK_PERF_EVENTS_DISABLE); + + event_read(&events[0]); + event_read(&events[1]); + + // We could scale all the events by running/enabled but we're lazy + // As long as the PMU is uncontended they should all run + FAIL_IF(events[0].result.running != events[0].result.enabled); + FAIL_IF(events[1].result.running != events[1].result.enabled); + + pred = events[0].result.value; + mpred = events[1].result.value; + + if (is_p9) { + event_read(&events[2]); + event_read(&events[3]); + FAIL_IF(events[2].result.running != events[2].result.enabled); + FAIL_IF(events[3].result.running != events[3].result.enabled); + + pred += events[2].result.value; + mpred += events[3].result.value; + } + + *miss_percent = 100 * mpred / pred; + + return 0; +} + +static void setup_event(struct event *e, u64 config, char *name) +{ + event_init_named(e, config, name); + + e->attr.disabled = 1; + e->attr.exclude_kernel = 1; + e->attr.exclude_hv = 1; + e->attr.exclude_idle = 1; +} + +enum spectre_v2_state { + VULNERABLE = 0, + UNKNOWN = 1, // Works with FAIL_IF() + NOT_AFFECTED, + BRANCH_SERIALISATION, + COUNT_CACHE_DISABLED, + COUNT_CACHE_FLUSH_SW, + COUNT_CACHE_FLUSH_HW, + BTB_FLUSH, +}; + +static enum spectre_v2_state get_sysfs_state(void) +{ + enum spectre_v2_state state = UNKNOWN; + char buf[256]; + int len; + + memset(buf, 0, sizeof(buf)); + FAIL_IF(read_sysfs_file("devices/system/cpu/vulnerabilities/spectre_v2", buf, sizeof(buf))); + + // Make sure it's NULL terminated + buf[sizeof(buf) - 1] = '\0'; + + // Trim the trailing newline + len = strlen(buf); + FAIL_IF(len < 1); + buf[len - 1] = '\0'; + + printf("sysfs reports: '%s'\n", buf); + + // Order matters + if (strstr(buf, "Vulnerable")) + state = VULNERABLE; + else if (strstr(buf, "Not affected")) + state = NOT_AFFECTED; + else if (strstr(buf, "Indirect branch serialisation (kernel only)")) + state = BRANCH_SERIALISATION; + else if (strstr(buf, "Indirect branch cache disabled")) + state = COUNT_CACHE_DISABLED; + else if (strstr(buf, "Software count cache flush (hardware accelerated)")) + state = COUNT_CACHE_FLUSH_HW; + else if (strstr(buf, "Software count cache flush")) + state = COUNT_CACHE_FLUSH_SW; + else if (strstr(buf, "Branch predictor state flush")) + state = BTB_FLUSH; + + return state; +} + +#define PM_BR_PRED_CCACHE 0x040a4 // P8 + P9 +#define PM_BR_MPRED_CCACHE 0x040ac // P8 + P9 +#define PM_BR_PRED_PCACHE 0x048a0 // P9 only +#define PM_BR_MPRED_PCACHE 0x048b0 // P9 only + +#define SPRN_PVR 287 + +int spectre_v2_test(void) +{ + enum spectre_v2_state state; + struct event events[4]; + s64 miss_percent; + bool is_p9; + + state = get_sysfs_state(); + if (state == UNKNOWN) { + printf("Error: couldn't determine spectre_v2 mitigation state?\n"); + return -1; + } + + memset(events, 0, sizeof(events)); + + setup_event(&events[0], PM_BR_PRED_CCACHE, "PM_BR_PRED_CCACHE"); + setup_event(&events[1], PM_BR_MPRED_CCACHE, "PM_BR_MPRED_CCACHE"); + FAIL_IF(event_open(&events[0])); + FAIL_IF(event_open_with_group(&events[1], events[0].fd) == -1); + + is_p9 = ((mfspr(SPRN_PVR) >> 16) & 0xFFFF) == 0x4e; + + if (is_p9) { + // Count pattern cache too + setup_event(&events[2], PM_BR_PRED_PCACHE, "PM_BR_PRED_PCACHE"); + setup_event(&events[3], PM_BR_MPRED_PCACHE, "PM_BR_MPRED_PCACHE"); + + FAIL_IF(event_open_with_group(&events[2], events[0].fd) == -1); + FAIL_IF(event_open_with_group(&events[3], events[0].fd) == -1); + } + + FAIL_IF(do_count_loop(events, is_p9, &miss_percent)); + + event_report_justified(&events[0], 18, 10); + event_report_justified(&events[1], 18, 10); + event_close(&events[0]); + event_close(&events[1]); + + if (is_p9) { + event_report_justified(&events[2], 18, 10); + event_report_justified(&events[3], 18, 10); + event_close(&events[2]); + event_close(&events[3]); + } + + printf("Miss percent %lld %%\n", miss_percent); + + switch (state) { + case VULNERABLE: + case NOT_AFFECTED: + case COUNT_CACHE_FLUSH_SW: + case COUNT_CACHE_FLUSH_HW: + // These should all not affect userspace branch prediction + if (miss_percent > 15) { + printf("Branch misses > 15%% unexpected in this configuration!\n"); + printf("Possible mis-match between reported & actual mitigation\n"); + return 1; + } + break; + case BRANCH_SERIALISATION: + // This seems to affect userspace branch prediction a bit? + if (miss_percent > 25) { + printf("Branch misses > 25%% unexpected in this configuration!\n"); + printf("Possible mis-match between reported & actual mitigation\n"); + return 1; + } + break; + case COUNT_CACHE_DISABLED: + if (miss_percent < 95) { + printf("Branch misses < 20%% unexpected in this configuration!\n"); + printf("Possible mis-match between reported & actual mitigation\n"); + return 1; + } + break; + case UNKNOWN: + case BTB_FLUSH: + printf("Not sure!\n"); + return 1; + } + + printf("OK - Measured branch prediction rates match reported spectre v2 mitigation.\n"); + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(spectre_v2_test, "spectre_v2"); +} diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index c02d24835db462..5ee0e98c489671 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -127,6 +127,26 @@ bool is_ppc64le(void) return strcmp(uts.machine, "ppc64le") == 0; } +int read_sysfs_file(char *fpath, char *result, size_t result_size) +{ + char path[PATH_MAX] = "/sys/"; + int rc = -1, fd; + + strncat(path, fpath, PATH_MAX - strlen(path) - 1); + + if ((fd = open(path, O_RDONLY)) < 0) + return rc; + + rc = read(fd, result, result_size); + + close(fd); + + if (rc < 0) + return rc; + + return 0; +} + int read_debugfs_file(char *debugfs_file, int *result) { int rc = -1, fd; From 5c74f79958682fccd82a6029c53859d1dab3b239 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Sat, 8 Dec 2018 16:46:23 +0100 Subject: [PATCH 020/144] powerpc/ptrace: Add prototype for function pt_regs_check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `pt_regs_check` is a dummy function, its purpose is to break the build if struct pt_regs and struct user_pt_regs don't match. This function has no functionnal purpose, and will get eliminated at link time or after init depending on CONFIG_LD_DEAD_CODE_DATA_ELIMINATION This commit adds a prototype to fix warning at W=1: arch/powerpc/kernel/ptrace.c:3339:13: error: no previous prototype for ‘pt_regs_check’ [-Werror=missing-prototypes] Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20181208154624.6504-1-malat@debian.org --- arch/powerpc/kernel/ptrace.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 8c92febf5f4439..b06d8dffd32a8d 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -3361,6 +3361,12 @@ void do_syscall_trace_leave(struct pt_regs *regs) user_enter(); } +void __init pt_regs_check(void); + +/* + * Dummy function, its purpose is to break the build if struct pt_regs and + * struct user_pt_regs don't match. + */ void __init pt_regs_check(void) { BUILD_BUG_ON(offsetof(struct pt_regs, gpr) != From b9e0805abf2e92fc275ac5fbd8c1c9a92b00413d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 30 Oct 2019 22:12:31 +1100 Subject: [PATCH 021/144] powerpc: Add build-time check of ptrace PT_xx defines As part of the uapi we export a lot of PT_xx defines for each register in struct pt_regs. These are expressed as an index from gpr[0], in units of unsigned long. Currently there's nothing tying the values of those defines to the actual layout of the struct. But we *don't* want to change the uapi defines to derive the PT_xx values based on the layout of the struct, those values are ABI and must never change. Instead we want to do the reverse, make sure that the layout of the struct never changes vs the PT_xx defines. So add build time checks of that. This probably seems paranoid, but at least once in the past someone has sent a patch that would have broken the ABI if it hadn't been spotted. Although it probably would have been detected via testing, it's preferable to just quash any issues at the source. Signed-off-by: Michael Ellerman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191030111231.22720-1-mpe@ellerman.id.au --- arch/powerpc/kernel/ptrace.c | 63 ++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index b06d8dffd32a8d..76724a023b9be8 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -3404,4 +3404,67 @@ void __init pt_regs_check(void) offsetof(struct user_pt_regs, result)); BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs)); + + // Now check that the pt_regs offsets match the uapi #defines + #define CHECK_REG(_pt, _reg) \ + BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \ + sizeof(unsigned long))); + + CHECK_REG(PT_R0, gpr[0]); + CHECK_REG(PT_R1, gpr[1]); + CHECK_REG(PT_R2, gpr[2]); + CHECK_REG(PT_R3, gpr[3]); + CHECK_REG(PT_R4, gpr[4]); + CHECK_REG(PT_R5, gpr[5]); + CHECK_REG(PT_R6, gpr[6]); + CHECK_REG(PT_R7, gpr[7]); + CHECK_REG(PT_R8, gpr[8]); + CHECK_REG(PT_R9, gpr[9]); + CHECK_REG(PT_R10, gpr[10]); + CHECK_REG(PT_R11, gpr[11]); + CHECK_REG(PT_R12, gpr[12]); + CHECK_REG(PT_R13, gpr[13]); + CHECK_REG(PT_R14, gpr[14]); + CHECK_REG(PT_R15, gpr[15]); + CHECK_REG(PT_R16, gpr[16]); + CHECK_REG(PT_R17, gpr[17]); + CHECK_REG(PT_R18, gpr[18]); + CHECK_REG(PT_R19, gpr[19]); + CHECK_REG(PT_R20, gpr[20]); + CHECK_REG(PT_R21, gpr[21]); + CHECK_REG(PT_R22, gpr[22]); + CHECK_REG(PT_R23, gpr[23]); + CHECK_REG(PT_R24, gpr[24]); + CHECK_REG(PT_R25, gpr[25]); + CHECK_REG(PT_R26, gpr[26]); + CHECK_REG(PT_R27, gpr[27]); + CHECK_REG(PT_R28, gpr[28]); + CHECK_REG(PT_R29, gpr[29]); + CHECK_REG(PT_R30, gpr[30]); + CHECK_REG(PT_R31, gpr[31]); + CHECK_REG(PT_NIP, nip); + CHECK_REG(PT_MSR, msr); + CHECK_REG(PT_ORIG_R3, orig_gpr3); + CHECK_REG(PT_CTR, ctr); + CHECK_REG(PT_LNK, link); + CHECK_REG(PT_XER, xer); + CHECK_REG(PT_CCR, ccr); +#ifdef CONFIG_PPC64 + CHECK_REG(PT_SOFTE, softe); +#else + CHECK_REG(PT_MQ, mq); +#endif + CHECK_REG(PT_TRAP, trap); + CHECK_REG(PT_DAR, dar); + CHECK_REG(PT_DSISR, dsisr); + CHECK_REG(PT_RESULT, result); + #undef CHECK_REG + + BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long)); + + /* + * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the + * real registers. + */ + BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); } From e44ff9ea8f4c8a90c82f7b85bd4f5e497c841960 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Oct 2019 11:47:30 +1100 Subject: [PATCH 022/144] powerpc/tools: Don't quote $objdump in scripts Some of our scripts are passed $objdump and then call it as "$objdump". This doesn't work if it contains spaces because we're using ccache, for example you get errors such as: ./arch/powerpc/tools/relocs_check.sh: line 48: ccache ppc64le-objdump: No such file or directory ./arch/powerpc/tools/unrel_branch_check.sh: line 26: ccache ppc64le-objdump: No such file or directory Fix it by not quoting the string when we expand it, allowing the shell to do the right thing for us. Fixes: a71aa05e1416 ("powerpc: Convert relocs_check to a shell script using grep") Fixes: 4ea80652dc75 ("powerpc/64s: Tool to flag direct branches from unrelocated interrupt vectors") Signed-off-by: Michael Ellerman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191024004730.32135-1-mpe@ellerman.id.au --- arch/powerpc/tools/relocs_check.sh | 2 +- arch/powerpc/tools/unrel_branch_check.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index 2b4e959caa365a..7b9fe0a567cf36 100755 --- a/arch/powerpc/tools/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh @@ -20,7 +20,7 @@ objdump="$1" vmlinux="$2" bad_relocs=$( -"$objdump" -R "$vmlinux" | +$objdump -R "$vmlinux" | # Only look at relocation lines. grep -E '\:' | awk '{print $1}' ) BRANCHES=$( -"$objdump" -R "$vmlinux" -D --start-address=0xc000000000000000 \ +$objdump -R "$vmlinux" -D --start-address=0xc000000000000000 \ --stop-address=${end_intr} | grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" | grep -v '\<__start_initialization_multiplatform>' | From 505127068d9b705a6cf335143239db91bfe7bbe2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 5 Nov 2019 10:15:56 +1100 Subject: [PATCH 023/144] selftests/powerpc: Skip tm-signal-sigreturn-nt if TM not available On systems where TM (Transactional Memory) is disabled the tm-signal-sigreturn-nt test causes a SIGILL: test: tm_signal_sigreturn_nt tags: git_version:7c202575ef63 !! child died by signal 4 failure: tm_signal_sigreturn_nt We should skip the test if TM is not available. Fixes: 34642d70ac7e ("selftests/powerpc: Add checks for transactional sigreturn") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191104233524.24348-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c index 56fbf9f6bbf30e..07c388147b75e8 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c @@ -10,10 +10,12 @@ */ #define _GNU_SOURCE +#include #include #include #include "utils.h" +#include "tm.h" void trap_signal_handler(int signo, siginfo_t *si, void *uc) { @@ -29,6 +31,8 @@ int tm_signal_sigreturn_nt(void) { struct sigaction trap_sa; + SKIP_IF(!have_htm()); + trap_sa.sa_flags = SA_SIGINFO; trap_sa.sa_sigaction = trap_signal_handler; From 3366ebe9e19ba3c672a00a6fb86f0ac8636ee989 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 16 Oct 2019 13:36:10 -0500 Subject: [PATCH 024/144] powerpc/pseries: address checkpatch warnings in dlpar_offline_cpu Remove some stray blank lines, convert a printk to pr_warn, and address a line length violation. One functional change: use WARN_ON instead of BUG_ON in case H_PROD of a ceded thread yields an unexpected result from the platform. We can expect this code path to get uninterruptibly stuck in __cpu_die() if this happens, but that's more desirable than crashing. Signed-off-by: Nathan Lynch Fixes: b6db63d1a7f0 ("pseries/pseries: Add code to online/offline CPUs of a DLPAR node") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191016183611.10867-2-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index bbda646b63b548..f3f54ef645e124 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -539,7 +539,6 @@ static int dlpar_offline_cpu(struct device_node *dn) goto out; cpu_maps_update_begin(); break; - } /* @@ -547,19 +546,19 @@ static int dlpar_offline_cpu(struct device_node *dn) * Upgrade it's state to CPU_STATE_OFFLINE. */ set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); - BUG_ON(plpar_hcall_norets(H_PROD, thread) - != H_SUCCESS); + WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS); __cpu_die(cpu); break; } - if (cpu == num_possible_cpus()) - printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread); + if (cpu == num_possible_cpus()) { + pr_warn("Could not find cpu to offline with physical id 0x%x\n", + thread); + } } cpu_maps_update_done(); out: return rc; - } static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) From 80c784282859cc39617b808440a34d0be9502b87 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 16 Oct 2019 13:36:11 -0500 Subject: [PATCH 025/144] powerpc/pseries: safely roll back failed DLPAR cpu add dlpar_online_cpu() attempts to online all threads of a core that has been added to an LPAR. If onlining a non-primary thread fails (e.g. due to an allocation failure), the core is left with at least one thread online. dlpar_cpu_add() attempts to roll back the whole operation, releasing the core back to the platform. However, since some threads of the core being removed are still online, the BUG_ON(cpu_online(cpu)) in pseries_remove_processor() strikes: LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 3 PID: 8587 Comm: drmgr Not tainted 5.3.0-rc2-00190-g9b123d1ea237-dirty #46 NIP: c0000000000eeb2c LR: c0000000000eeac4 CTR: c0000000000ee9e0 REGS: c0000001f745b6c0 TRAP: 0700 Not tainted (5.3.0-rc2-00190-g9b123d1ea237-dirty) MSR: 800000010282b033 CR: 44002448 XER: 00000000 CFAR: c00000000195d718 IRQMASK: 0 GPR00: c0000000000eeac4 c0000001f745b950 c0000000032f6200 0000000000000008 GPR04: 0000000000000008 c000000003349c78 0000000000000040 00000000000001ff GPR08: 0000000000000008 0000000000000000 0000000000000001 0007ffffffffffff GPR12: 0000000084002844 c00000001ecacb80 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000008 GPR24: c000000003349ee0 c00000000334a2e4 c0000000fca4d7a8 c000000001d20048 GPR28: 0000000000000001 ffffffffffffffff ffffffffffffffff c0000000fca4d7c4 NIP [c0000000000eeb2c] pseries_smp_notifier+0x14c/0x2e0 LR [c0000000000eeac4] pseries_smp_notifier+0xe4/0x2e0 Call Trace: [c0000001f745b950] [c0000000000eeac4] pseries_smp_notifier+0xe4/0x2e0 (unreliable) [c0000001f745ba10] [c0000000001ac774] notifier_call_chain+0xb4/0x190 [c0000001f745bab0] [c0000000001ad62c] blocking_notifier_call_chain+0x7c/0xb0 [c0000001f745baf0] [c00000000167bda0] of_detach_node+0xc0/0x110 [c0000001f745bb50] [c0000000000e7ae4] dlpar_detach_node+0x64/0xa0 [c0000001f745bb80] [c0000000000edefc] dlpar_cpu_add+0x31c/0x360 [c0000001f745bc10] [c0000000000ee980] dlpar_cpu_probe+0x50/0xb0 [c0000001f745bc50] [c00000000002cf70] arch_cpu_probe+0x40/0x70 [c0000001f745bc70] [c000000000ccd808] cpu_probe_store+0x48/0x80 [c0000001f745bcb0] [c000000000cbcef8] dev_attr_store+0x38/0x60 [c0000001f745bcd0] [c00000000059c980] sysfs_kf_write+0x70/0xb0 [c0000001f745bd10] [c00000000059afb8] kernfs_fop_write+0xf8/0x280 [c0000001f745bd60] [c0000000004b437c] __vfs_write+0x3c/0x70 [c0000001f745bd80] [c0000000004b8710] vfs_write+0xd0/0x220 [c0000001f745bdd0] [c0000000004b8acc] ksys_write+0x7c/0x140 [c0000001f745be20] [c00000000000bbd8] system_call+0x5c/0x68 Move dlpar_offline_cpu() up in the file so that dlpar_online_cpu() can use it to re-offline any threads that have been onlined when an error is encountered. Signed-off-by: Nathan Lynch Fixes: e666ae0b10aa ("powerpc/pseries: Update CPU hotplug error recovery") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191016183611.10867-3-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 116 ++++++++++--------- 1 file changed, 59 insertions(+), 57 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index f3f54ef645e124..8ab24bd7f89c65 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -338,6 +338,62 @@ static void pseries_remove_processor(struct device_node *np) cpu_maps_update_done(); } +static int dlpar_offline_cpu(struct device_node *dn) +{ + int rc = 0; + unsigned int cpu; + int len, nthreads, i; + const __be32 *intserv; + u32 thread; + + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return -EINVAL; + + nthreads = len / sizeof(u32); + + cpu_maps_update_begin(); + for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); + for_each_present_cpu(cpu) { + if (get_hard_smp_processor_id(cpu) != thread) + continue; + + if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) + break; + + if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { + set_preferred_offline_state(cpu, + CPU_STATE_OFFLINE); + cpu_maps_update_done(); + timed_topology_update(1); + rc = device_offline(get_cpu_device(cpu)); + if (rc) + goto out; + cpu_maps_update_begin(); + break; + } + + /* + * The cpu is in CPU_STATE_INACTIVE. + * Upgrade it's state to CPU_STATE_OFFLINE. + */ + set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); + WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS); + __cpu_die(cpu); + break; + } + if (cpu == num_possible_cpus()) { + pr_warn("Could not find cpu to offline with physical id 0x%x\n", + thread); + } + } + cpu_maps_update_done(); + +out: + return rc; +} + static int dlpar_online_cpu(struct device_node *dn) { int rc = 0; @@ -364,8 +420,10 @@ static int dlpar_online_cpu(struct device_node *dn) timed_topology_update(1); find_and_online_cpu_nid(cpu); rc = device_online(get_cpu_device(cpu)); - if (rc) + if (rc) { + dlpar_offline_cpu(dn); goto out; + } cpu_maps_update_begin(); break; @@ -505,62 +563,6 @@ static ssize_t dlpar_cpu_add(u32 drc_index) return rc; } -static int dlpar_offline_cpu(struct device_node *dn) -{ - int rc = 0; - unsigned int cpu; - int len, nthreads, i; - const __be32 *intserv; - u32 thread; - - intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); - if (!intserv) - return -EINVAL; - - nthreads = len / sizeof(u32); - - cpu_maps_update_begin(); - for (i = 0; i < nthreads; i++) { - thread = be32_to_cpu(intserv[i]); - for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != thread) - continue; - - if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) - break; - - if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { - set_preferred_offline_state(cpu, - CPU_STATE_OFFLINE); - cpu_maps_update_done(); - timed_topology_update(1); - rc = device_offline(get_cpu_device(cpu)); - if (rc) - goto out; - cpu_maps_update_begin(); - break; - } - - /* - * The cpu is in CPU_STATE_INACTIVE. - * Upgrade it's state to CPU_STATE_OFFLINE. - */ - set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); - WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS); - __cpu_die(cpu); - break; - } - if (cpu == num_possible_cpus()) { - pr_warn("Could not find cpu to offline with physical id 0x%x\n", - thread); - } - } - cpu_maps_update_done(); - -out: - return rc; -} - static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) { int rc; From 8e6b6da91ac9b9ec5a925b6cb13f287a54bd547d Mon Sep 17 00:00:00 2001 From: Anthony Steinhauser Date: Tue, 29 Oct 2019 12:07:59 -0700 Subject: [PATCH 026/144] powerpc/security/book3s64: Report L1TF status in sysfs Some PowerPC CPUs are vulnerable to L1TF to the same extent as to Meltdown. It is also mitigated by flushing the L1D on privilege transition. Currently the sysfs gives a false negative on L1TF on CPUs that I verified to be vulnerable, a Power9 Talos II Boston 004e 1202, PowerNV T2P9D01. Signed-off-by: Anthony Steinhauser Signed-off-by: Michael Ellerman [mpe: Just have cpu_show_l1tf() call cpu_show_meltdown() directly] Link: https://lore.kernel.org/r/20191029190759.84821-1-asteinhauser@google.com --- arch/powerpc/kernel/security.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 7cfcb294b11ca7..ad7f4bf8447c62 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -167,6 +167,11 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha return sprintf(buf, "Vulnerable\n"); } + +ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_meltdown(dev, attr, buf); +} #endif ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) From a42d6ba8c5be5aa597d25dbc15e336a2eca40260 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Oct 2019 13:27:59 +0530 Subject: [PATCH 027/144] powerpc/mm/book3s64/radix: Remove unused code. mm_tlb_flush_nested change was added in the mmu gather tlb flush to handle the case of parallel pte invalidate happening with mmap_sem held in read mode. This fix was done by commit 02390f66bd23 ("powerpc/64s/radix: Fix MADV_[FREE|DONTNEED] TLB flush miss problem with THP") and the problem is explained in detail in commit 99baac21e458 ("mm: fix MADV_[FREE|DONTNEED] TLB flush miss problem") This was later updated by commit 7a30df49f63a ("mm: mmu_gather: remove __tlb_reset_range() for force flush") to do a full mm flush rather than a range flush. By commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap") we are also now allowing a page table free in mmap_sem read mode which means we should do a PWC flush too. Our current full mm flush imply a PWC flush. With all the above change the mm_tlb_flush_nested(mm) branch in radix__tlb_flush will never be taken because for the nested case we would have taken the if (tlb->fullmm) branch. This patch removes the unused code. Also, remove the gflush change in __radix__flush_tlb_range that was added to handle the range tlb flush code. We only check for THP there because hugetlb is flushed via a different code path where page size is explicitly specified. This is a partial revert of commit 02390f66bd23 ("powerpc/64s/radix: Fix MADV_[FREE|DONTNEED] TLB flush miss problem with THP") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191024075801.22434-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 66 +++------------------------- 1 file changed, 6 insertions(+), 60 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 67af871190c6dc..24d1f30556e0a1 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -832,8 +832,7 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; static inline void __radix__flush_tlb_range(struct mm_struct *mm, - unsigned long start, unsigned long end, - bool flush_all_sizes) + unsigned long start, unsigned long end) { unsigned long pid; @@ -879,26 +878,16 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, } } } else { - bool hflush = flush_all_sizes; - bool gflush = flush_all_sizes; + bool hflush = false; unsigned long hstart, hend; - unsigned long gstart, gend; - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) - hflush = true; - - if (hflush) { + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { hstart = (start + PMD_SIZE - 1) & PMD_MASK; hend = end & PMD_MASK; if (hstart == hend) hflush = false; - } - - if (gflush) { - gstart = (start + PUD_SIZE - 1) & PUD_MASK; - gend = end & PUD_MASK; - if (gstart == gend) - gflush = false; + else + hflush = true; } if (local) { @@ -907,9 +896,6 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, if (hflush) __tlbiel_va_range(hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M); - if (gflush) - __tlbiel_va_range(gstart, gend, pid, - PUD_SIZE, MMU_PAGE_1G); asm volatile("ptesync": : :"memory"); } else if (cputlb_use_tlbie()) { asm volatile("ptesync": : :"memory"); @@ -917,10 +903,6 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, if (hflush) __tlbie_va_range(hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M); - if (gflush) - __tlbie_va_range(gstart, gend, pid, - PUD_SIZE, MMU_PAGE_1G); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); } else { _tlbiel_va_range_multicast(mm, @@ -928,9 +910,6 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, if (hflush) _tlbiel_va_range_multicast(mm, hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); - if (gflush) - _tlbiel_va_range_multicast(mm, - gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false); } } preempt_enable(); @@ -945,7 +924,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, return radix__flush_hugetlb_tlb_range(vma, start, end); #endif - __radix__flush_tlb_range(vma->vm_mm, start, end, false); + __radix__flush_tlb_range(vma->vm_mm, start, end); } EXPORT_SYMBOL(radix__flush_tlb_range); @@ -1023,39 +1002,6 @@ void radix__tlb_flush(struct mmu_gather *tlb) */ if (tlb->fullmm) { __flush_all_mm(mm, true); -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) - } else if (mm_tlb_flush_nested(mm)) { - /* - * If there is a concurrent invalidation that is clearing ptes, - * then it's possible this invalidation will miss one of those - * cleared ptes and miss flushing the TLB. If this invalidate - * returns before the other one flushes TLBs, that can result - * in it returning while there are still valid TLBs inside the - * range to be invalidated. - * - * See mm/memory.c:tlb_finish_mmu() for more details. - * - * The solution to this is ensure the entire range is always - * flushed here. The problem for powerpc is that the flushes - * are page size specific, so this "forced flush" would not - * do the right thing if there are a mix of page sizes in - * the range to be invalidated. So use __flush_tlb_range - * which invalidates all possible page sizes in the range. - * - * PWC flush probably is not be required because the core code - * shouldn't free page tables in this path, but accounting - * for the possibility makes us a bit more robust. - * - * need_flush_all is an uncommon case because page table - * teardown should be done with exclusive locks held (but - * after locks are dropped another invalidate could come - * in), it could be optimized further if necessary. - */ - if (!tlb->need_flush_all) - __radix__flush_tlb_range(mm, start, end, true); - else - radix__flush_all_mm(mm); -#endif } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { if (!tlb->need_flush_all) radix__flush_tlb_mm(mm); From 52162ec784fa05f3a4b1d8e84421279998be3773 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Oct 2019 13:28:00 +0530 Subject: [PATCH 028/144] powerpc/mm/book3s64/radix: Use freed_tables instead of need_flush_all With commit 22a61c3c4f13 ("asm-generic/tlb: Track freeing of page-table directories in struct mmu_gather") we now track whether we freed page table in mmu_gather. Use that to decide whether to flush Page Walk Cache. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191024075801.22434-2-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 15 --------------- arch/powerpc/include/asm/book3s/64/tlbflush.h | 16 ---------------- arch/powerpc/mm/book3s64/radix_tlb.c | 11 +++-------- 3 files changed, 3 insertions(+), 39 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index d5a44912902f90..f6968c81102603 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -122,11 +122,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long address) { - /* - * By now all the pud entries should be none entries. So go - * ahead and flush the page walk cache - */ - flush_tlb_pgtable(tlb, address); pgtable_free_tlb(tlb, pud, PUD_INDEX); } @@ -143,11 +138,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { - /* - * By now all the pud entries should be none entries. So go - * ahead and flush the page walk cache - */ - flush_tlb_pgtable(tlb, address); return pgtable_free_tlb(tlb, pmd, PMD_INDEX); } @@ -166,11 +156,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { - /* - * By now all the pud entries should be none entries. So go - * ahead and flush the page walk cache - */ - flush_tlb_pgtable(tlb, address); pgtable_free_tlb(tlb, table, PTE_INDEX); } diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 7aa8195b6cff86..dcb5c3839d2f7b 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -147,22 +147,6 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, flush_tlb_page(vma, address); } -/* - * flush the page walk cache for the address - */ -static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long address) -{ - /* - * Flush the page table walk cache on freeing a page table. We already - * have marked the upper/higher level page table entry none by now. - * So it is safe to flush PWC here. - */ - if (!radix_enabled()) - return; - - radix__flush_tlb_pwc(tlb, address); -} - extern bool tlbie_capable; extern bool tlbie_enabled; diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 24d1f30556e0a1..f9a4d5793f0315 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -732,18 +732,13 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) } preempt_enable(); } + void radix__flush_all_mm(struct mm_struct *mm) { __flush_all_mm(mm, false); } EXPORT_SYMBOL(radix__flush_all_mm); -void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) -{ - tlb->need_flush_all = 1; -} -EXPORT_SYMBOL(radix__flush_tlb_pwc); - void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) { @@ -1003,12 +998,12 @@ void radix__tlb_flush(struct mmu_gather *tlb) if (tlb->fullmm) { __flush_all_mm(mm, true); } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { - if (!tlb->need_flush_all) + if (!tlb->freed_tables) radix__flush_tlb_mm(mm); else radix__flush_all_mm(mm); } else { - if (!tlb->need_flush_all) + if (!tlb->freed_tables) radix__flush_tlb_range_psize(mm, start, end, psize); else radix__flush_tlb_pwc_range_psize(mm, start, end, psize); From 864edb758c50c30787bb51f2e26c4be2b4937025 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Oct 2019 13:28:01 +0530 Subject: [PATCH 029/144] powerpc/mm/book3s64/radix: Flush the full mm even when need_flush_all is set With the previous patch, we should now not be using need_flush_all for powerpc. But then make sure we force a PID tlbie flush with RIC=2 if we ever find need_flush_all set. Also don't reset it after a mmu gather flush. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191024075801.22434-3-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 3 +-- include/asm-generic/tlb.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index f9a4d5793f0315..a95175c0972b7f 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -995,7 +995,7 @@ void radix__tlb_flush(struct mmu_gather *tlb) * that flushes the process table entry cache upon process teardown. * See the comment for radix in arch_exit_mmap(). */ - if (tlb->fullmm) { + if (tlb->fullmm || tlb->need_flush_all) { __flush_all_mm(mm, true); } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { if (!tlb->freed_tables) @@ -1008,7 +1008,6 @@ void radix__tlb_flush(struct mmu_gather *tlb) else radix__flush_tlb_pwc_range_psize(mm, start, end, psize); } - tlb->need_flush_all = 0; } static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 04c0644006fda0..e64991142a8b66 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -428,7 +428,7 @@ static inline void tlb_change_page_size(struct mmu_gather *tlb, { #ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE if (tlb->page_size && tlb->page_size != page_size) { - if (!tlb->fullmm) + if (!tlb->fullmm && !tlb->need_flush_all) tlb_flush_mmu(tlb); } From 16f6b67cf03cb43db7104acb2ca877bdc2606c92 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 1 Oct 2019 14:16:56 +0530 Subject: [PATCH 030/144] powerpc/book3s64/hash: Add cond_resched to avoid soft lockup warning With large memory (8TB and more) hotplug, we can get soft lockup warnings as below. These were caused by a long loop without any explicit cond_resched which is a problem for !PREEMPT kernels. Avoid this using cond_resched() while inserting hash page table entries. We already do similar cond_resched() in __add_pages(), see commit f64ac5e6e306 ("mm, memory_hotplug: add scheduling point to __add_pages"). rcu: 3-....: (24002 ticks this GP) idle=13e/1/0x4000000000000002 softirq=722/722 fqs=12001 (t=24003 jiffies g=4285 q=2002) NMI backtrace for cpu 3 CPU: 3 PID: 3870 Comm: ndctl Not tainted 5.3.0-197.18-default+ #2 Call Trace: dump_stack+0xb0/0xf4 (unreliable) nmi_cpu_backtrace+0x124/0x130 nmi_trigger_cpumask_backtrace+0x1ac/0x1f0 arch_trigger_cpumask_backtrace+0x28/0x3c rcu_dump_cpu_stacks+0xf8/0x154 rcu_sched_clock_irq+0x878/0xb40 update_process_times+0x48/0x90 tick_sched_handle.isra.16+0x4c/0x80 tick_sched_timer+0x68/0xe0 __hrtimer_run_queues+0x180/0x430 hrtimer_interrupt+0x110/0x300 timer_interrupt+0x108/0x2f0 decrementer_common+0x114/0x120 --- interrupt: 901 at arch_add_memory+0xc0/0x130 LR = arch_add_memory+0x74/0x130 memremap_pages+0x494/0x650 devm_memremap_pages+0x3c/0xa0 pmem_attach_disk+0x188/0x750 nvdimm_bus_probe+0xac/0x2c0 really_probe+0x148/0x570 driver_probe_device+0x19c/0x1d0 device_driver_attach+0xcc/0x100 bind_store+0x134/0x1c0 drv_attr_store+0x44/0x60 sysfs_kf_write+0x64/0x90 kernfs_fop_write+0x1a0/0x270 __vfs_write+0x3c/0x70 vfs_write+0xd0/0x260 ksys_write+0xdc/0x130 system_call+0x5c/0x68 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191001084656.31277-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/hash_utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index a9d1f72de8484f..b30435c7d8042a 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -316,6 +316,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, if (ret < 0) break; + cond_resched(); #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled() && (paddr >> PAGE_SHIFT) < linear_map_hash_count) From 3b05a1e517e1a8cfda4866ec31d28b2bc4fee4c4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 21 Oct 2019 16:23:09 +0200 Subject: [PATCH 031/144] powerpc/security: Fix debugfs data leak on 32-bit "powerpc_security_features" is "unsigned long", i.e. 32-bit or 64-bit, depending on the platform (PPC_FSL_BOOK3E or PPC_BOOK3S_64). Hence casting its address to "u64 *", and calling debugfs_create_x64() is wrong, and leaks 32-bit of nearby data to userspace on 32-bit platforms. While all currently defined SEC_FTR_* security feature flags fit in 32-bit, they all have "ULL" suffixes to make them 64-bit constants. Hence fix the leak by changing the type of "powerpc_security_features" (and the parameter types of its accessors) to "u64". This also allows to drop the cast. Fixes: 398af571128fe75f ("powerpc/security: Show powerpc_security_features in debugfs") Signed-off-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191021142309.28105-1-geert+renesas@glider.be --- arch/powerpc/include/asm/security_features.h | 8 ++++---- arch/powerpc/kernel/security.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 759597bf0fd867..a3e8ecd48dc7ff 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -9,7 +9,7 @@ #define _ASM_POWERPC_SECURITY_FEATURES_H -extern unsigned long powerpc_security_features; +extern u64 powerpc_security_features; extern bool rfi_flush; /* These are bit flags */ @@ -24,17 +24,17 @@ void setup_stf_barrier(void); void do_stf_barrier_fixups(enum stf_barrier_type types); void setup_count_cache_flush(void); -static inline void security_ftr_set(unsigned long feature) +static inline void security_ftr_set(u64 feature) { powerpc_security_features |= feature; } -static inline void security_ftr_clear(unsigned long feature) +static inline void security_ftr_clear(u64 feature) { powerpc_security_features &= ~feature; } -static inline bool security_ftr_enabled(unsigned long feature) +static inline bool security_ftr_enabled(u64 feature) { return !!(powerpc_security_features & feature); } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index ad7f4bf8447c62..a3021e6faed865 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -16,7 +16,7 @@ #include -unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; +u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; enum count_cache_flush_type { COUNT_CACHE_FLUSH_NONE = 0x1, @@ -108,7 +108,7 @@ device_initcall(barrier_nospec_debugfs_init); static __init int security_feature_debugfs_init(void) { debugfs_create_x64("security_features", 0400, powerpc_debugfs_root, - (u64 *)&powerpc_security_features); + &powerpc_security_features); return 0; } device_initcall(security_feature_debugfs_init); From 3775026a654c15c92c8ac2d53f3fd14fdd1980df Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 2 Nov 2018 22:17:06 +0100 Subject: [PATCH 032/144] macintosh: ans-lcd: make anslcd_logo static and __initconst This variable has no reason to have external linkage, and since it is only used in an __init function, it might as well be made __initconst also. Signed-off-by: Rasmus Villemoes Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20181102211707.10229-1-linux@rasmusvillemoes.dk --- drivers/macintosh/ans-lcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/macintosh/ans-lcd.c b/drivers/macintosh/ans-lcd.c index 400960cf04d53d..b1314d104b062e 100644 --- a/drivers/macintosh/ans-lcd.c +++ b/drivers/macintosh/ans-lcd.c @@ -147,7 +147,8 @@ static struct miscdevice anslcd_dev = { &anslcd_fops }; -const char anslcd_logo[] = "********************" /* Line #1 */ +static const char anslcd_logo[] __initconst = + "********************" /* Line #1 */ "* LINUX! *" /* Line #3 */ "* Welcome to *" /* Line #2 */ "********************"; /* Line #4 */ From 6266a4dadb1d0976490fdf5af4f7941e36f64e80 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 6 Nov 2019 13:30:25 +1100 Subject: [PATCH 033/144] powerpc/64s: Always disable branch profiling for prom_init.o Otherwise the build fails because prom_init is calling symbols it's not allowed to, eg: Error: External symbol 'ftrace_likely_update' referenced from prom_init.c make[3]: *** [arch/powerpc/kernel/Makefile:197: arch/powerpc/kernel/prom_init_check] Error 1 Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191106051129.7626-1-mpe@ellerman.id.au --- arch/powerpc/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 4fdd8a3e775b7c..717b52b65d6215 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -22,6 +22,7 @@ CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) +CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code @@ -39,7 +40,6 @@ KASAN_SANITIZE_btext.o := n ifdef CONFIG_KASAN CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING -CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif From d79fbb3a32f05a7e1cc0294b86dacdb9cc3ad7f5 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 2 Aug 2019 10:50:06 +1200 Subject: [PATCH 034/144] powerpc: Support CMDLINE_EXTEND Bring powerpc in line with other architectures that support extending or overriding the bootloader provided command line. The current behaviour is most like CMDLINE_FROM_BOOTLOADER where the bootloader command line is preferred but the kernel config can provide a fallback so CMDLINE_FROM_BOOTLOADER is the default. CMDLINE_EXTEND can be used to append the CMDLINE from the kernel config to the one provided by the bootloader. Signed-off-by: Chris Packham Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190801225006.21952-1-chris.packham@alliedtelesis.co.nz --- arch/powerpc/Kconfig | 20 +++++++++++++++++- arch/powerpc/kernel/prom_init.c | 36 ++++++++++++++++++++++----------- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3e56c9c2f16eed..51a975c3d79b0a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -874,15 +874,33 @@ config CMDLINE some command-line options at build time by entering them here. In most cases you will need to specify the root device here. +choice + prompt "Kernel command line type" if CMDLINE != "" + default CMDLINE_FROM_BOOTLOADER + +config CMDLINE_FROM_BOOTLOADER + bool "Use bootloader kernel arguments if available" + help + Uses the command-line options passed by the boot loader. If + the boot loader doesn't provide any, the default kernel command + string provided in CMDLINE will be used. + +config CMDLINE_EXTEND + bool "Extend bootloader kernel arguments" + help + The command-line arguments provided by the boot loader will be + appended to the default kernel command string. + config CMDLINE_FORCE bool "Always use the default kernel command string" - depends on CMDLINE_BOOL help Always use the default kernel command string, even if the boot loader passes other arguments to the kernel. This is useful if you cannot or don't want to change the command-line options your boot loader passes to the kernel. +endchoice + config EXTRA_TARGETS string "Additional default image types" help diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 100f1b57ec2fd1..90987db10974c3 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -303,16 +303,24 @@ static char __init *prom_strstr(const char *s1, const char *s2) return NULL; } -static size_t __init prom_strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = prom_strlen(src); +static size_t __init prom_strlcat(char *dest, const char *src, size_t count) +{ + size_t dsize = prom_strlen(dest); + size_t len = prom_strlen(src); + size_t res = dsize + len; + + /* This would be a bug */ + if (dsize >= count) + return count; + + dest += dsize; + count -= dsize; + if (len >= count) + len = count-1; + memcpy(dest, src, len); + dest[len] = 0; + return res; - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - memcpy(dest, src, len); - dest[len] = '\0'; - } - return ret; } #ifdef CONFIG_PPC_PSERIES @@ -764,10 +772,14 @@ static void __init early_cmdline_parse(void) prom_cmd_line[0] = 0; p = prom_cmd_line; - if ((long)prom.chosen > 0) + + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && (long)prom.chosen > 0) l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1); - if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */ - prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line)); + + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || l <= 0 || p[0] == '\0') + prom_strlcat(prom_cmd_line, " " CONFIG_CMDLINE, + sizeof(prom_cmd_line)); + prom_printf("command line: %s\n", prom_cmd_line); #ifdef CONFIG_PPC64 From 29430fae82073d39b1b881a3cd507416a56a363f Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Mon, 4 Nov 2019 13:32:53 +1100 Subject: [PATCH 035/144] powerpc: Allow flush_icache_range to work across ranges >4GB When calling flush_icache_range with a size >4GB, we were masking off the upper 32 bits, so we would incorrectly flush a range smaller than intended. This patch replaces the 32 bit shifts with 64 bit ones, so that the full size is accounted for. Signed-off-by: Alastair D'Silva Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191104023305.9581-2-alastair@au1.ibm.com --- arch/powerpc/kernel/misc_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index b55a7b4cb543fd..9bc0aa9aeb6543 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -82,7 +82,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */ - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 1: dcbst 0,r6 @@ -98,7 +98,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */ - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 2: icbi 0,r6 From f9ec11165301982585e5e5f606739b5bae5331f3 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Mon, 4 Nov 2019 13:32:54 +1100 Subject: [PATCH 036/144] powerpc: Allow 64bit VDSO __kernel_sync_dicache to work across ranges >4GB When calling __kernel_sync_dicache with a size >4GB, we were masking off the upper 32 bits, so we would incorrectly flush a range smaller than intended. This patch replaces the 32 bit shifts with 64 bit ones, so that the full size is accounted for. Signed-off-by: Alastair D'Silva Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191104023305.9581-3-alastair@au1.ibm.com --- arch/powerpc/kernel/vdso64/cacheflush.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index 3f92561a64c4d4..526f5ba2593e22 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -35,7 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 @@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 From 7a0745c5e03ff1129864bc6d80f5c4417e8d7893 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Mon, 4 Nov 2019 13:32:55 +1100 Subject: [PATCH 037/144] powerpc: define helpers to get L1 icache sizes This patch adds helpers to retrieve icache sizes, and renames the existing helpers to make it clear that they are for dcache. Signed-off-by: Alastair D'Silva Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191104023305.9581-4-alastair@au1.ibm.com --- arch/powerpc/include/asm/cache.h | 29 +++++++++++++++++++++++---- arch/powerpc/include/asm/cacheflush.h | 12 +++++------ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 45e3137ccd71c2..afb88754e0e076 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -55,25 +55,46 @@ struct ppc64_caches { extern struct ppc64_caches ppc64_caches; -static inline u32 l1_cache_shift(void) +static inline u32 l1_dcache_shift(void) { return ppc64_caches.l1d.log_block_size; } -static inline u32 l1_cache_bytes(void) +static inline u32 l1_dcache_bytes(void) { return ppc64_caches.l1d.block_size; } + +static inline u32 l1_icache_shift(void) +{ + return ppc64_caches.l1i.log_block_size; +} + +static inline u32 l1_icache_bytes(void) +{ + return ppc64_caches.l1i.block_size; +} #else -static inline u32 l1_cache_shift(void) +static inline u32 l1_dcache_shift(void) { return L1_CACHE_SHIFT; } -static inline u32 l1_cache_bytes(void) +static inline u32 l1_dcache_bytes(void) { return L1_CACHE_BYTES; } + +static inline u32 l1_icache_shift(void) +{ + return L1_CACHE_SHIFT; +} + +static inline u32 l1_icache_bytes(void) +{ + return L1_CACHE_BYTES; +} + #endif #endif /* ! __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index eef388f2659f4f..ed57843ef4524a 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -63,8 +63,8 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr) */ static inline void flush_dcache_range(unsigned long start, unsigned long stop) { - unsigned long shift = l1_cache_shift(); - unsigned long bytes = l1_cache_bytes(); + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); void *addr = (void *)(start & ~(bytes - 1)); unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; @@ -89,8 +89,8 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop) */ static inline void clean_dcache_range(unsigned long start, unsigned long stop) { - unsigned long shift = l1_cache_shift(); - unsigned long bytes = l1_cache_bytes(); + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); void *addr = (void *)(start & ~(bytes - 1)); unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; @@ -108,8 +108,8 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop) static inline void invalidate_dcache_range(unsigned long start, unsigned long stop) { - unsigned long shift = l1_cache_shift(); - unsigned long bytes = l1_cache_bytes(); + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); void *addr = (void *)(start & ~(bytes - 1)); unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; From 23eb7f560a2a6a1b0dbaaaae8685da75314347e4 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Mon, 4 Nov 2019 13:32:56 +1100 Subject: [PATCH 038/144] powerpc: Convert flush_icache_range & friends to C Similar to commit 22e9c88d486a ("powerpc/64: reuse PPC32 static inline flush_dcache_range()") this patch converts the following ASM symbols to C: flush_icache_range() __flush_dcache_icache() __flush_dcache_icache_phys() This was done as we discovered a long-standing bug where the length of the range was truncated due to using a 32 bit shift instead of a 64 bit one. By converting these functions to C, it becomes easier to maintain. flush_dcache_icache_phys() retains a critical assembler section as we must ensure there are no memory accesses while the data MMU is disabled (authored by Christophe Leroy). Since this has no external callers, it has also been made static, allowing the compiler to inline it within flush_dcache_icache_page(). Signed-off-by: Alastair D'Silva Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman [mpe: Minor fixups, don't export __flush_dcache_icache()] Link: https://lore.kernel.org/r/20191104023305.9581-5-alastair@au1.ibm.com --- arch/powerpc/include/asm/cache.h | 26 ++--- arch/powerpc/include/asm/cacheflush.h | 24 ++--- arch/powerpc/kernel/misc_32.S | 120 --------------------- arch/powerpc/kernel/misc_64.S | 102 ------------------ arch/powerpc/mm/mem.c | 150 +++++++++++++++++++++++++- 5 files changed, 170 insertions(+), 252 deletions(-) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index afb88754e0e076..72b81015cebe9a 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -96,22 +96,7 @@ static inline u32 l1_icache_bytes(void) } #endif -#endif /* ! __ASSEMBLY__ */ - -#if defined(__ASSEMBLY__) -/* - * For a snooping icache, we still need a dummy icbi to purge all the - * prefetched instructions from the ifetch buffers. We also need a sync - * before the icbi to order the the actual stores to memory that might - * have modified instructions with the icbi. - */ -#define PURGE_PREFETCHED_INS \ - sync; \ - icbi 0,r3; \ - sync; \ - isync -#else #define __read_mostly __attribute__((__section__(".data..read_mostly"))) #ifdef CONFIG_PPC_BOOK3S_32 @@ -145,6 +130,17 @@ static inline void dcbst(void *addr) { __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory"); } + +static inline void icbi(void *addr) +{ + asm volatile ("icbi 0, %0" : : "r"(addr) : "memory"); +} + +static inline void iccci(void *addr) +{ + asm volatile ("iccci 0, %0" : : "r"(addr) : "memory"); +} + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_CACHE_H */ diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index ed57843ef4524a..4a1c9f0200e1bf 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -42,24 +42,20 @@ extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) -extern void flush_icache_range(unsigned long, unsigned long); +void flush_icache_range(unsigned long start, unsigned long stop); extern void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len); -extern void __flush_dcache_icache(void *page_va); extern void flush_dcache_icache_page(struct page *page); -#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE) -extern void __flush_dcache_icache_phys(unsigned long physaddr); -#else -static inline void __flush_dcache_icache_phys(unsigned long physaddr) -{ - BUG(); -} -#endif - -/* - * Write any modified data cache blocks out to memory and invalidate them. - * Does not invalidate the corresponding instruction cache blocks. +void __flush_dcache_icache(void *page); + +/** + * flush_dcache_range(): Write any modified data cache blocks out to memory and + * invalidate them. Does not invalidate the corresponding instruction cache + * blocks. + * + * @start: the start address + * @stop: the stop address (exclusive) */ static inline void flush_dcache_range(unsigned long start, unsigned long stop) { diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 82df4b09e79f49..f4e4a1926a7a5b 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -316,126 +316,6 @@ _GLOBAL(flush_instruction_cache) EXPORT_SYMBOL(flush_instruction_cache) #endif /* CONFIG_PPC_8xx */ -/* - * Write any modified data cache blocks out to memory - * and invalidate the corresponding instruction cache blocks. - * This is a no-op on the 601. - * - * flush_icache_range(unsigned long start, unsigned long stop) - */ -_GLOBAL(flush_icache_range) -#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200) - PURGE_PREFETCHED_INS - blr /* for 601 and e200, do nothing */ -#else - rlwinm r3,r3,0,0,31 - L1_CACHE_SHIFT - subf r4,r3,r4 - addi r4,r4,L1_CACHE_BYTES - 1 - srwi. r4,r4,L1_CACHE_SHIFT - beqlr - mtctr r4 - mr r6,r3 -1: dcbst 0,r3 - addi r3,r3,L1_CACHE_BYTES - bdnz 1b - sync /* wait for dcbst's to get to ram */ -#ifndef CONFIG_44x - mtctr r4 -2: icbi 0,r6 - addi r6,r6,L1_CACHE_BYTES - bdnz 2b -#else - /* Flash invalidate on 44x because we are passed kmapped addresses and - this doesn't work for userspace pages due to the virtually tagged - icache. Sigh. */ - iccci 0, r0 -#endif - sync /* additional sync needed on g4 */ - isync - blr -#endif -_ASM_NOKPROBE_SYMBOL(flush_icache_range) -EXPORT_SYMBOL(flush_icache_range) - -/* - * Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * This is a no-op on the 601 and e200 which have a unified cache. - * - * void __flush_dcache_icache(void *page) - */ -_GLOBAL(__flush_dcache_icache) -#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200) - PURGE_PREFETCHED_INS - blr -#else - rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ - li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ - mtctr r4 - mr r6,r3 -0: dcbst 0,r3 /* Write line to ram */ - addi r3,r3,L1_CACHE_BYTES - bdnz 0b - sync -#ifdef CONFIG_44x - /* We don't flush the icache on 44x. Those have a virtual icache - * and we don't have access to the virtual address here (it's - * not the page vaddr but where it's mapped in user space). The - * flushing of the icache on these is handled elsewhere, when - * a change in the address space occurs, before returning to - * user space - */ -BEGIN_MMU_FTR_SECTION - blr -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x) -#endif /* CONFIG_44x */ - mtctr r4 -1: icbi 0,r6 - addi r6,r6,L1_CACHE_BYTES - bdnz 1b - sync - isync - blr -#endif - -#ifndef CONFIG_BOOKE -/* - * Flush a particular page from the data cache to RAM, identified - * by its physical address. We turn off the MMU so we can just use - * the physical address (this may be a highmem page without a kernel - * mapping). - * - * void __flush_dcache_icache_phys(unsigned long physaddr) - */ -_GLOBAL(__flush_dcache_icache_phys) -#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200) - PURGE_PREFETCHED_INS - blr /* for 601 and e200, do nothing */ -#else - mfmsr r10 - rlwinm r0,r10,0,28,26 /* clear DR */ - mtmsr r0 - isync - rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ - li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ - mtctr r4 - mr r6,r3 -0: dcbst 0,r3 /* Write line to ram */ - addi r3,r3,L1_CACHE_BYTES - bdnz 0b - sync - mtctr r4 -1: icbi 0,r6 - addi r6,r6,L1_CACHE_BYTES - bdnz 1b - sync - mtmsr r10 /* restore DR */ - isync - blr -#endif -#endif /* CONFIG_BOOKE */ - /* * Copy a whole page. We use the dcbz instruction on the destination * to reduce memory traffic (it eliminates the unnecessary reads of diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 9bc0aa9aeb6543..ff20c253f2737a 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -49,108 +49,6 @@ _GLOBAL(call_do_irq) mtlr r0 blr - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" - -/* - * Write any modified data cache blocks out to memory - * and invalidate the corresponding instruction cache blocks. - * - * flush_icache_range(unsigned long start, unsigned long stop) - * - * flush all bytes from start through stop-1 inclusive - */ - -_GLOBAL_TOC(flush_icache_range) -BEGIN_FTR_SECTION - PURGE_PREFETCHED_INS - blr -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) -/* - * Flush the data cache to memory - * - * Different systems have different cache line sizes - * and in some cases i-cache and d-cache line sizes differ from - * each other. - */ - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */ - srd. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mtctr r8 -1: dcbst 0,r6 - add r6,r6,r7 - bdnz 1b - sync - -/* Now invalidate the instruction cache */ - - lwz r7,ICACHEL1BLOCKSIZE(r10) /* Get Icache block size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 - lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */ - srd. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mtctr r8 -2: icbi 0,r6 - add r6,r6,r7 - bdnz 2b - isync - blr -_ASM_NOKPROBE_SYMBOL(flush_icache_range) -EXPORT_SYMBOL(flush_icache_range) - -/* - * Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * - * void __flush_dcache_icache(void *page) - */ -_GLOBAL(__flush_dcache_icache) -/* - * Flush the data cache to memory - * - * Different systems have different cache line sizes - */ - -BEGIN_FTR_SECTION - PURGE_PREFETCHED_INS - blr -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) - -/* Flush the dcache */ - ld r7,PPC64_CACHES@toc(r2) - clrrdi r3,r3,PAGE_SHIFT /* Page align */ - lwz r4,DCACHEL1BLOCKSPERPAGE(r7) /* Get # dcache blocks per page */ - lwz r5,DCACHEL1BLOCKSIZE(r7) /* Get dcache block size */ - mr r6,r3 - mtctr r4 -0: dcbst 0,r6 - add r6,r6,r5 - bdnz 0b - sync - -/* Now invalidate the icache */ - - lwz r4,ICACHEL1BLOCKSPERPAGE(r7) /* Get # icache blocks per page */ - lwz r5,ICACHEL1BLOCKSIZE(r7) /* Get icache block size */ - mtctr r4 -1: icbi 0,r3 - add r3,r3,r5 - bdnz 1b - isync - blr - _GLOBAL(__bswapdi2) EXPORT_SYMBOL(__bswapdi2) srdi r8,r3,32 diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index be941d382c8d10..3392cacabe6068 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -318,6 +318,120 @@ void free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } +/** + * flush_coherent_icache() - if a CPU has a coherent icache, flush it + * @addr: The base address to use (can be any valid address, the whole cache will be flushed) + * Return true if the cache was flushed, false otherwise + */ +static inline bool flush_coherent_icache(unsigned long addr) +{ + /* + * For a snooping icache, we still need a dummy icbi to purge all the + * prefetched instructions from the ifetch buffers. We also need a sync + * before the icbi to order the the actual stores to memory that might + * have modified instructions with the icbi. + */ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + mb(); /* sync */ + icbi((void *)addr); + mb(); /* sync */ + isync(); + return true; + } + + return false; +} + +/** + * invalidate_icache_range() - Flush the icache by issuing icbi across an address range + * @start: the start address + * @stop: the stop address (exclusive) + */ +static void invalidate_icache_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_icache_shift(); + unsigned long bytes = l1_icache_bytes(); + char *addr = (char *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + icbi(addr); + + mb(); /* sync */ + isync(); +} + +/** + * flush_icache_range: Write any modified data cache blocks out to memory + * and invalidate the corresponding blocks in the instruction cache + * + * Generic code will call this after writing memory, before executing from it. + * + * @start: the start address + * @stop: the stop address (exclusive) + */ +void flush_icache_range(unsigned long start, unsigned long stop) +{ + if (flush_coherent_icache(start)) + return; + + clean_dcache_range(start, stop); + + if (IS_ENABLED(CONFIG_44x)) { + /* + * Flash invalidate on 44x because we are passed kmapped + * addresses and this doesn't work for userspace pages due to + * the virtually tagged icache. + */ + iccci((void *)start); + mb(); /* sync */ + isync(); + } else + invalidate_icache_range(start, stop); +} +EXPORT_SYMBOL(flush_icache_range); + +#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) +/** + * flush_dcache_icache_phys() - Flush a page by it's physical address + * @physaddr: the physical address of the page + */ +static void flush_dcache_icache_phys(unsigned long physaddr) +{ + unsigned long bytes = l1_dcache_bytes(); + unsigned long nb = PAGE_SIZE / bytes; + unsigned long addr = physaddr & PAGE_MASK; + unsigned long msr, msr0; + unsigned long loop1 = addr, loop2 = addr; + + msr0 = mfmsr(); + msr = msr0 & ~MSR_DR; + /* + * This must remain as ASM to prevent potential memory accesses + * while the data MMU is disabled + */ + asm volatile( + " mtctr %2;\n" + " mtmsr %3;\n" + " isync;\n" + "0: dcbst 0, %0;\n" + " addi %0, %0, %4;\n" + " bdnz 0b;\n" + " sync;\n" + " mtctr %2;\n" + "1: icbi 0, %1;\n" + " addi %1, %1, %4;\n" + " bdnz 1b;\n" + " sync;\n" + " mtmsr %5;\n" + " isync;\n" + : "+&r" (loop1), "+&r" (loop2) + : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) + : "ctr", "memory"); +} +#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) + /* * This is called when a page has been modified by the kernel. * It just marks the page as not i-cache clean. We do the i-cache @@ -350,12 +464,46 @@ void flush_dcache_icache_page(struct page *page) __flush_dcache_icache(start); kunmap_atomic(start); } else { - __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); + unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; + + if (flush_coherent_icache(addr)) + return; + flush_dcache_icache_phys(addr); } #endif } EXPORT_SYMBOL(flush_dcache_icache_page); +/** + * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * @page: the address of the page to flush + */ +void __flush_dcache_icache(void *p) +{ + unsigned long addr = (unsigned long)p; + + if (flush_coherent_icache(addr)) + return; + + clean_dcache_range(addr, addr + PAGE_SIZE); + + /* + * We don't flush the icache on 44x. Those have a virtual icache and we + * don't have access to the virtual address here (it's not the page + * vaddr but where it's mapped in user space). The flushing of the + * icache on these is handled elsewhere, when a change in the address + * space occurs, before returning to user space. + */ + + if (cpu_has_feature(MMU_FTR_TYPE_44x)) + return; + + invalidate_icache_range(addr, addr + PAGE_SIZE); +} + void clear_user_page(void *page, unsigned long vaddr, struct page *pg) { clear_page(page); From 076265907cf9633bbef861c7c2a1c26a8209f283 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Mon, 4 Nov 2019 13:32:57 +1100 Subject: [PATCH 039/144] powerpc: Chunk calls to flush_dcache_range in arch_*_memory When presented with large amounts of memory being hotplugged (in my test case, ~890GB), the call to flush_dcache_range takes a while (~50 seconds), triggering RCU stalls. This patch breaks up the call into 1GB chunks, calling cond_resched() inbetween to allow the scheduler to run. Fixes: fb5924fddf9e ("powerpc/mm: Flush cache on memory hot(un)plug") Signed-off-by: Alastair D'Silva Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191104023305.9581-6-alastair@au1.ibm.com --- arch/powerpc/mm/mem.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3392cacabe6068..634e5ea55b6b37 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -104,6 +104,27 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } +#define FLUSH_CHUNK_SIZE SZ_1G +/** + * flush_dcache_range_chunked(): Write any modified data cache blocks out to + * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE + * Does not invalidate the corresponding instruction cache blocks. + * + * @start: the start address + * @stop: the stop address (exclusive) + * @chunk: the max size of the chunks + */ +static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, + unsigned long chunk) +{ + unsigned long i; + + for (i = start; i < stop; i += chunk) { + flush_dcache_range(i, min(stop, start + chunk)); + cond_resched(); + } +} + int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_restrictions *restrictions) { @@ -120,7 +141,8 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, start, start + size, rc); return -EFAULT; } - flush_dcache_range(start, start + size); + + flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); return __add_pages(nid, start_pfn, nr_pages, restrictions); } @@ -137,7 +159,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); - flush_dcache_range(start, start + size); + flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); + ret = remove_section_mapping(start, start + size); WARN_ON_ONCE(ret); From ea458effa88e4f4739551d76fe3f702daf607995 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Mon, 4 Nov 2019 13:32:58 +1100 Subject: [PATCH 040/144] powerpc: Don't flush caches when adding memory This operation takes a significant amount of time when hotplugging large amounts of memory (~50 seconds with 890GB of persistent memory). This was orignally in commit fb5924fddf9e ("powerpc/mm: Flush cache on memory hot(un)plug") to support memtrace, but the flush on add is not needed as it is flushed on remove. Signed-off-by: Alastair D'Silva Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191104023305.9581-7-alastair@au1.ibm.com --- arch/powerpc/mm/mem.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 634e5ea55b6b37..7573002077a67b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -142,8 +142,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, return -EFAULT; } - flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); - return __add_pages(nid, start_pfn, nr_pages, restrictions); } From 1a8916ee3ac29054322cdac687d36e1b5894d272 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Tue, 5 Nov 2019 17:00:22 -0600 Subject: [PATCH 041/144] powerpc: Detect the secure boot mode of the system This patch defines a function to detect the secure boot state of a PowerNV system. The PPC_SECURE_BOOT config represents the base enablement of secure boot for powerpc. Signed-off-by: Nayna Jain Signed-off-by: Eric Richter [mpe: Fold in change from Nayna to add "ibm,secureboot" to ids] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/46b003b9-3225-6bf7-9101-ed6580bb748c@linux.ibm.com --- arch/powerpc/Kconfig | 10 ++++++++ arch/powerpc/include/asm/secure_boot.h | 23 +++++++++++++++++ arch/powerpc/kernel/Makefile | 2 ++ arch/powerpc/kernel/secure_boot.c | 35 ++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) create mode 100644 arch/powerpc/include/asm/secure_boot.h create mode 100644 arch/powerpc/kernel/secure_boot.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3e56c9c2f16eed..56ea0019b61677 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -934,6 +934,16 @@ config PPC_MEM_KEYS If unsure, say y. +config PPC_SECURE_BOOT + prompt "Enable secure boot support" + bool + depends on PPC_POWERNV + help + Systems with firmware secure boot enabled need to define security + policies to extend secure boot to the OS. This config allows a user + to enable OS secure boot on systems that have firmware support for + it. If in doubt say N. + endmenu config ISA_DMA_API diff --git a/arch/powerpc/include/asm/secure_boot.h b/arch/powerpc/include/asm/secure_boot.h new file mode 100644 index 00000000000000..07d0fe0ca81fe7 --- /dev/null +++ b/arch/powerpc/include/asm/secure_boot.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Secure boot definitions + * + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + */ +#ifndef _ASM_POWER_SECURE_BOOT_H +#define _ASM_POWER_SECURE_BOOT_H + +#ifdef CONFIG_PPC_SECURE_BOOT + +bool is_ppc_secureboot_enabled(void); + +#else + +static inline bool is_ppc_secureboot_enabled(void) +{ + return false; +} + +#endif +#endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index a7ca8fe623686a..e2a54fa240ac78 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -161,6 +161,8 @@ ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),) obj-y += ucall.o endif +obj-$(CONFIG_PPC_SECURE_BOOT) += secure_boot.o + # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n KCOV_INSTRUMENT_prom_init.o := n diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c new file mode 100644 index 00000000000000..583c2c4edaf03d --- /dev/null +++ b/arch/powerpc/kernel/secure_boot.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + */ +#include +#include +#include + +static struct device_node *get_ppc_fw_sb_node(void) +{ + static const struct of_device_id ids[] = { + { .compatible = "ibm,secureboot", }, + { .compatible = "ibm,secureboot-v1", }, + { .compatible = "ibm,secureboot-v2", }, + {}, + }; + + return of_find_matching_node(NULL, ids); +} + +bool is_ppc_secureboot_enabled(void) +{ + struct device_node *node; + bool enabled = false; + + node = get_ppc_fw_sb_node(); + enabled = of_property_read_bool(node, "os-secureboot-enforcing"); + + of_node_put(node); + + pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); + + return enabled; +} From 4238fad366a660cbc6499ca1ea4be42bd4d1ac5b Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Wed, 30 Oct 2019 23:31:27 -0400 Subject: [PATCH 042/144] powerpc/ima: Add support to initialize ima policy rules PowerNV systems use a Linux-based bootloader, which rely on the IMA subsystem to enforce different secure boot modes. Since the verification policy may differ based on the secure boot mode of the system, the policies must be defined at runtime. This patch implements arch-specific support to define IMA policy rules based on the runtime secure boot mode of the system. This patch provides arch-specific IMA policies if PPC_SECURE_BOOT config is enabled. Signed-off-by: Nayna Jain Signed-off-by: Mimi Zohar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1572492694-6520-3-git-send-email-zohar@linux.ibm.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/ima_arch.c | 43 ++++++++++++++++++++++++++++++++++ include/linux/ima.h | 3 ++- 4 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/kernel/ima_arch.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 56ea0019b61677..c795039bdc73b6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -938,6 +938,7 @@ config PPC_SECURE_BOOT prompt "Enable secure boot support" bool depends on PPC_POWERNV + depends on IMA_ARCH_POLICY help Systems with firmware secure boot enabled need to define security policies to extend secure boot to the OS. This config allows a user diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index e2a54fa240ac78..e8eb2955b7d5ed 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -161,7 +161,7 @@ ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),) obj-y += ucall.o endif -obj-$(CONFIG_PPC_SECURE_BOOT) += secure_boot.o +obj-$(CONFIG_PPC_SECURE_BOOT) += secure_boot.o ima_arch.o # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c new file mode 100644 index 00000000000000..d88913dc0da7d9 --- /dev/null +++ b/arch/powerpc/kernel/ima_arch.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + */ + +#include +#include + +bool arch_ima_get_secureboot(void) +{ + return is_ppc_secureboot_enabled(); +} + +/* + * The "secure_rules" are enabled only on "secureboot" enabled systems. + * These rules verify the file signatures against known good values. + * The "appraise_type=imasig|modsig" option allows the known good signature + * to be stored as an xattr or as an appended signature. + * + * To avoid duplicate signature verification as much as possible, the IMA + * policy rule for module appraisal is added only if CONFIG_MODULE_SIG_FORCE + * is not enabled. + */ +static const char *const secure_rules[] = { + "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig", +#ifndef CONFIG_MODULE_SIG_FORCE + "appraise func=MODULE_CHECK appraise_type=imasig|modsig", +#endif + NULL +}; + +/* + * Returns the relevant IMA arch-specific policies based on the system secure + * boot state. + */ +const char *const *arch_get_ima_policy(void) +{ + if (is_ppc_secureboot_enabled()) + return secure_rules; + + return NULL; +} diff --git a/include/linux/ima.h b/include/linux/ima.h index 1c37f17f72035b..6d904754d858bd 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -29,7 +29,8 @@ extern void ima_kexec_cmdline(const void *buf, int size); extern void ima_add_kexec_buffer(struct kimage *image); #endif -#if (defined(CONFIG_X86) && defined(CONFIG_EFI)) || defined(CONFIG_S390) +#if (defined(CONFIG_X86) && defined(CONFIG_EFI)) || defined(CONFIG_S390) \ + || defined(CONFIG_PPC_SECURE_BOOT) extern bool arch_ima_get_secureboot(void); extern const char * const *arch_get_ima_policy(void); #else From 2702809a4a1ab414d75c00936cda70ea77c8234e Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Tue, 5 Nov 2019 17:02:07 -0600 Subject: [PATCH 043/144] powerpc: Detect the trusted boot state of the system While secure boot permits only properly verified signed kernels to be booted, trusted boot calculates the file hash of the kernel image and stores the measurement prior to boot, that can be subsequently compared against good known values via attestation services. This patch reads the trusted boot state of a PowerNV system. The state is used to conditionally enable additional measurement rules in the IMA arch-specific policies. Signed-off-by: Nayna Jain Signed-off-by: Eric Richter Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e9eeee6b-b9bf-1e41-2954-61dbd6fbfbcf@linux.ibm.com --- arch/powerpc/include/asm/secure_boot.h | 6 ++++++ arch/powerpc/kernel/secure_boot.c | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/powerpc/include/asm/secure_boot.h b/arch/powerpc/include/asm/secure_boot.h index 07d0fe0ca81fe7..a2ff556916c66b 100644 --- a/arch/powerpc/include/asm/secure_boot.h +++ b/arch/powerpc/include/asm/secure_boot.h @@ -11,6 +11,7 @@ #ifdef CONFIG_PPC_SECURE_BOOT bool is_ppc_secureboot_enabled(void); +bool is_ppc_trustedboot_enabled(void); #else @@ -19,5 +20,10 @@ static inline bool is_ppc_secureboot_enabled(void) return false; } +static inline bool is_ppc_trustedboot_enabled(void) +{ + return false; +} + #endif #endif diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c index 583c2c4edaf03d..4b982324d36819 100644 --- a/arch/powerpc/kernel/secure_boot.c +++ b/arch/powerpc/kernel/secure_boot.c @@ -33,3 +33,18 @@ bool is_ppc_secureboot_enabled(void) return enabled; } + +bool is_ppc_trustedboot_enabled(void) +{ + struct device_node *node; + bool enabled = false; + + node = get_ppc_fw_sb_node(); + enabled = of_property_read_bool(node, "trusted-enabled"); + + of_node_put(node); + + pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); + + return enabled; +} From 1917855f4e0658c313e280671ad87774dbfb7b24 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Wed, 30 Oct 2019 23:31:29 -0400 Subject: [PATCH 044/144] powerpc/ima: Define trusted boot policy This patch defines an arch-specific trusted boot only policy and a combined secure and trusted boot policy. Signed-off-by: Nayna Jain Signed-off-by: Mimi Zohar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1572492694-6520-5-git-send-email-zohar@linux.ibm.com --- arch/powerpc/kernel/ima_arch.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c index d88913dc0da7d9..0ef5956c9753e8 100644 --- a/arch/powerpc/kernel/ima_arch.c +++ b/arch/powerpc/kernel/ima_arch.c @@ -30,6 +30,32 @@ static const char *const secure_rules[] = { NULL }; +/* + * The "trusted_rules" are enabled only on "trustedboot" enabled systems. + * These rules add the kexec kernel image and kernel modules file hashes to + * the IMA measurement list. + */ +static const char *const trusted_rules[] = { + "measure func=KEXEC_KERNEL_CHECK", + "measure func=MODULE_CHECK", + NULL +}; + +/* + * The "secure_and_trusted_rules" contains rules for both the secure boot and + * trusted boot. The "template=ima-modsig" option includes the appended + * signature, when available, in the IMA measurement list. + */ +static const char *const secure_and_trusted_rules[] = { + "measure func=KEXEC_KERNEL_CHECK template=ima-modsig", + "measure func=MODULE_CHECK template=ima-modsig", + "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig", +#ifndef CONFIG_MODULE_SIG_FORCE + "appraise func=MODULE_CHECK appraise_type=imasig|modsig", +#endif + NULL +}; + /* * Returns the relevant IMA arch-specific policies based on the system secure * boot state. @@ -37,7 +63,12 @@ static const char *const secure_rules[] = { const char *const *arch_get_ima_policy(void) { if (is_ppc_secureboot_enabled()) - return secure_rules; + if (is_ppc_trustedboot_enabled()) + return secure_and_trusted_rules; + else + return secure_rules; + else if (is_ppc_trustedboot_enabled()) + return trusted_rules; return NULL; } From e14555e3d0e9edfad0a6840c0152f71aba97e793 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Wed, 30 Oct 2019 23:31:30 -0400 Subject: [PATCH 045/144] ima: Make process_buffer_measurement() generic process_buffer_measurement() is limited to measuring the kexec boot command line. This patch makes process_buffer_measurement() more generic, allowing it to measure other types of buffer data (e.g. blacklisted binary hashes or key hashes). process_buffer_measurement() may be called directly from an IMA hook or as an auxiliary measurement record. In both cases the buffer measurement is based on policy. This patch modifies the function to conditionally retrieve the policy defined PCR and template for the IMA hook case. Signed-off-by: Nayna Jain [zohar@linux.ibm.com: added comment in process_buffer_measurement()] Signed-off-by: Mimi Zohar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1572492694-6520-6-git-send-email-zohar@linux.ibm.com --- security/integrity/ima/ima.h | 3 ++ security/integrity/ima/ima_main.c | 58 +++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 3689081aaf3804..a65772ffa427ee 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -217,6 +217,9 @@ void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file, struct evm_ima_xattr_data *xattr_value, int xattr_len, const struct modsig *modsig, int pcr, struct ima_template_desc *template_desc); +void process_buffer_measurement(const void *buf, int size, + const char *eventname, enum ima_hooks func, + int pcr); void ima_audit_measurement(struct integrity_iint_cache *iint, const unsigned char *filename); int ima_alloc_init_template(struct ima_event_data *event_data, diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 60027c643ecdf7..a26e3ad4e886cc 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -626,14 +626,14 @@ int ima_load_data(enum kernel_load_data_id id) * @buf: pointer to the buffer that needs to be added to the log. * @size: size of buffer(in bytes). * @eventname: event name to be used for the buffer entry. - * @cred: a pointer to a credentials structure for user validation. - * @secid: the secid of the task to be validated. + * @func: IMA hook + * @pcr: pcr to extend the measurement * * Based on policy, the buffer is measured into the ima log. */ -static void process_buffer_measurement(const void *buf, int size, - const char *eventname, - const struct cred *cred, u32 secid) +void process_buffer_measurement(const void *buf, int size, + const char *eventname, enum ima_hooks func, + int pcr) { int ret = 0; struct ima_template_entry *entry = NULL; @@ -642,19 +642,45 @@ static void process_buffer_measurement(const void *buf, int size, .filename = eventname, .buf = buf, .buf_len = size}; - struct ima_template_desc *template_desc = NULL; + struct ima_template_desc *template = NULL; struct { struct ima_digest_data hdr; char digest[IMA_MAX_DIGEST_SIZE]; } hash = {}; int violation = 0; - int pcr = CONFIG_IMA_MEASURE_PCR_IDX; int action = 0; + u32 secid; - action = ima_get_action(NULL, cred, secid, 0, KEXEC_CMDLINE, &pcr, - &template_desc); - if (!(action & IMA_MEASURE)) - return; + /* + * Both LSM hooks and auxilary based buffer measurements are + * based on policy. To avoid code duplication, differentiate + * between the LSM hooks and auxilary buffer measurements, + * retrieving the policy rule information only for the LSM hook + * buffer measurements. + */ + if (func) { + security_task_getsecid(current, &secid); + action = ima_get_action(NULL, current_cred(), secid, 0, func, + &pcr, &template); + if (!(action & IMA_MEASURE)) + return; + } + + if (!pcr) + pcr = CONFIG_IMA_MEASURE_PCR_IDX; + + if (!template) { + template = lookup_template_desc("ima-buf"); + ret = template_desc_init_fields(template->fmt, + &(template->fields), + &(template->num_fields)); + if (ret < 0) { + pr_err("template %s init failed, result: %d\n", + (strlen(template->name) ? + template->name : template->fmt), ret); + return; + } + } iint.ima_hash = &hash.hdr; iint.ima_hash->algo = ima_hash_algo; @@ -664,7 +690,7 @@ static void process_buffer_measurement(const void *buf, int size, if (ret < 0) goto out; - ret = ima_alloc_init_template(&event_data, &entry, template_desc); + ret = ima_alloc_init_template(&event_data, &entry, template); if (ret < 0) goto out; @@ -686,13 +712,9 @@ static void process_buffer_measurement(const void *buf, int size, */ void ima_kexec_cmdline(const void *buf, int size) { - u32 secid; - - if (buf && size != 0) { - security_task_getsecid(current, &secid); + if (buf && size != 0) process_buffer_measurement(buf, size, "kexec-cmdline", - current_cred(), secid); - } + KEXEC_CMDLINE, 0); } static int __init init_ima(void) From 2434f7d2d488c3301ae81f1031e1c66c6f076fb7 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Wed, 30 Oct 2019 23:31:31 -0400 Subject: [PATCH 046/144] certs: Add wrapper function to check blacklisted binary hash The -EKEYREJECTED error returned by existing is_hash_blacklisted() is misleading when called for checking against blacklisted hash of a binary. This patch adds a wrapper function is_binary_blacklisted() to return -EPERM error if binary is blacklisted. Signed-off-by: Nayna Jain Reviewed-by: Mimi Zohar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1572492694-6520-7-git-send-email-zohar@linux.ibm.com --- certs/blacklist.c | 9 +++++++++ include/keys/system_keyring.h | 6 ++++++ 2 files changed, 15 insertions(+) diff --git a/certs/blacklist.c b/certs/blacklist.c index ec00bf337eb673..6514f9ebc943f4 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -135,6 +135,15 @@ int is_hash_blacklisted(const u8 *hash, size_t hash_len, const char *type) } EXPORT_SYMBOL_GPL(is_hash_blacklisted); +int is_binary_blacklisted(const u8 *hash, size_t hash_len) +{ + if (is_hash_blacklisted(hash, hash_len, "bin") == -EKEYREJECTED) + return -EPERM; + + return 0; +} +EXPORT_SYMBOL_GPL(is_binary_blacklisted); + /* * Initialise the blacklist */ diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index c1a96fdf598bc2..fb8b07daa9d155 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -35,12 +35,18 @@ extern int restrict_link_by_builtin_and_secondary_trusted( extern int mark_hash_blacklisted(const char *hash); extern int is_hash_blacklisted(const u8 *hash, size_t hash_len, const char *type); +extern int is_binary_blacklisted(const u8 *hash, size_t hash_len); #else static inline int is_hash_blacklisted(const u8 *hash, size_t hash_len, const char *type) { return 0; } + +static inline int is_binary_blacklisted(const u8 *hash, size_t hash_len) +{ + return 0; +} #endif #ifdef CONFIG_IMA_BLACKLIST_KEYRING From 273df864cf7466fb170b8dcc1abd672cd08ad8d3 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Wed, 30 Oct 2019 23:31:32 -0400 Subject: [PATCH 047/144] ima: Check against blacklisted hashes for files with modsig Asymmetric private keys are used to sign multiple files. The kernel currently supports checking against blacklisted keys. However, if the public key is blacklisted, any file signed by the blacklisted key will automatically fail signature verification. Blacklisting the public key is not fine enough granularity, as we might want to only blacklist a particular file. This patch adds support for checking against the blacklisted hash of the file, without the appended signature, based on the IMA policy. It defines a new policy option "appraise_flag=check_blacklist". In addition to the blacklisted binary hashes stored in the firmware "dbx" variable, the Linux kernel may be configured to load blacklisted binary hashes onto the .blacklist keyring as well. The following example shows how to blacklist a specific kernel module hash. $ sha256sum kernel/kheaders.ko 77fa889b35a05338ec52e51591c1b89d4c8d1c99a21251d7c22b1a8642a6bad3 kernel/kheaders.ko $ grep BLACKLIST .config CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_SYSTEM_BLACKLIST_HASH_LIST="blacklist-hash-list" $ cat certs/blacklist-hash-list "bin:77fa889b35a05338ec52e51591c1b89d4c8d1c99a21251d7c22b1a8642a6bad3" Update the IMA custom measurement and appraisal policy rules (/etc/ima-policy): measure func=MODULE_CHECK template=ima-modsig appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig After building, installing, and rebooting the kernel: 545660333 ---lswrv 0 0 \_ blacklist: bin:77fa889b35a05338ec52e51591c1b89d4c8d1c99a21251d7c22b1a8642a6bad3 measure func=MODULE_CHECK template=ima-modsig appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig modprobe: ERROR: could not insert 'kheaders': Permission denied 10 0c9834db5a0182c1fb0cdc5d3adcf11a11fd83dd ima-sig sha256:3bc6ed4f0b4d6e31bc1dbc9ef844605abc7afdc6d81a57d77a1ec9407997c40 2 /usr/lib/modules/5.4.0-rc3+/kernel/kernel/kheaders.ko 10 82aad2bcc3fa8ed94762356b5c14838f3bcfa6a0 ima-modsig sha256:3bc6ed4f0b4d6e31bc1dbc9ef844605abc7afdc6d81a57d77a1ec9407997c40 2 /usr/lib/modules/5.4.0rc3+/kernel/kernel/kheaders.ko sha256:77fa889b3 5a05338ec52e51591c1b89d4c8d1c99a21251d7c22b1a8642a6bad3 3082029a06092a864886f70d010702a082028b30820287020101310d300b0609608648 016503040201300b06092a864886f70d01070131820264.... 10 25b72217cc1152b44b134ce2cd68f12dfb71acb3 ima-buf sha256:8b58427fedcf8f4b20bc8dc007f2e232bf7285d7b93a66476321f9c2a3aa132 b blacklisted-hash 77fa889b35a05338ec52e51591c1b89d4c8d1c99a21251d7c22b1a8642a6bad3 Signed-off-by: Nayna Jain [zohar@linux.ibm.com: updated patch description] Signed-off-by: Mimi Zohar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1572492694-6520-8-git-send-email-zohar@linux.ibm.com --- Documentation/ABI/testing/ima_policy | 4 ++++ security/integrity/ima/ima.h | 8 +++++++ security/integrity/ima/ima_appraise.c | 33 +++++++++++++++++++++++++++ security/integrity/ima/ima_main.c | 12 ++++++---- security/integrity/ima/ima_policy.c | 12 ++++++++-- security/integrity/integrity.h | 1 + 6 files changed, 64 insertions(+), 6 deletions(-) diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index 29ebe9afdac42c..29aaedf332461c 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -25,6 +25,7 @@ Description: lsm: [[subj_user=] [subj_role=] [subj_type=] [obj_user=] [obj_role=] [obj_type=]] option: [[appraise_type=]] [template=] [permit_directio] + [appraise_flag=] base: func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK] [FIRMWARE_CHECK] [KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK] @@ -38,6 +39,9 @@ Description: fowner:= decimal value lsm: are LSM specific option: appraise_type:= [imasig] [imasig|modsig] + appraise_flag:= [check_blacklist] + Currently, blacklist check is only for files signed with appended + signature. template:= name of a defined IMA template type (eg, ima-ng). Only valid when action is "measure". pcr:= decimal value diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index a65772ffa427ee..df4ca482fb53c4 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -256,6 +256,8 @@ int ima_policy_show(struct seq_file *m, void *v); #define IMA_APPRAISE_KEXEC 0x40 #ifdef CONFIG_IMA_APPRAISE +int ima_check_blacklist(struct integrity_iint_cache *iint, + const struct modsig *modsig, int pcr); int ima_appraise_measurement(enum ima_hooks func, struct integrity_iint_cache *iint, struct file *file, const unsigned char *filename, @@ -271,6 +273,12 @@ int ima_read_xattr(struct dentry *dentry, struct evm_ima_xattr_data **xattr_value); #else +static inline int ima_check_blacklist(struct integrity_iint_cache *iint, + const struct modsig *modsig, int pcr) +{ + return 0; +} + static inline int ima_appraise_measurement(enum ima_hooks func, struct integrity_iint_cache *iint, struct file *file, diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 136ae4e0ee9212..300c8d2943c573 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "ima.h" @@ -303,6 +304,38 @@ static int modsig_verify(enum ima_hooks func, const struct modsig *modsig, return rc; } +/* + * ima_check_blacklist - determine if the binary is blacklisted. + * + * Add the hash of the blacklisted binary to the measurement list, based + * on policy. + * + * Returns -EPERM if the hash is blacklisted. + */ +int ima_check_blacklist(struct integrity_iint_cache *iint, + const struct modsig *modsig, int pcr) +{ + enum hash_algo hash_algo; + const u8 *digest = NULL; + u32 digestsize = 0; + int rc = 0; + + if (!(iint->flags & IMA_CHECK_BLACKLIST)) + return 0; + + if (iint->flags & IMA_MODSIG_ALLOWED && modsig) { + ima_get_modsig_digest(modsig, &hash_algo, &digest, &digestsize); + + rc = is_binary_blacklisted(digest, digestsize); + if ((rc == -EPERM) && (iint->flags & IMA_MEASURE)) + process_buffer_measurement(digest, digestsize, + "blacklisted-hash", NONE, + pcr); + } + + return rc; +} + /* * ima_appraise_measurement - appraise file measurement * diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index a26e3ad4e886cc..d7e987baf12741 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -335,10 +335,14 @@ static int process_measurement(struct file *file, const struct cred *cred, xattr_value, xattr_len, modsig, pcr, template_desc); if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) { - inode_lock(inode); - rc = ima_appraise_measurement(func, iint, file, pathname, - xattr_value, xattr_len, modsig); - inode_unlock(inode); + rc = ima_check_blacklist(iint, modsig, pcr); + if (rc != -EPERM) { + inode_lock(inode); + rc = ima_appraise_measurement(func, iint, file, + pathname, xattr_value, + xattr_len, modsig); + inode_unlock(inode); + } if (!rc) rc = mmap_violation_check(func, file, &pathbuf, &pathname, filename); diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 5380aca2b35181..f19a895ad7cdf6 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -765,8 +765,8 @@ enum { Opt_fsuuid, Opt_uid_eq, Opt_euid_eq, Opt_fowner_eq, Opt_uid_gt, Opt_euid_gt, Opt_fowner_gt, Opt_uid_lt, Opt_euid_lt, Opt_fowner_lt, - Opt_appraise_type, Opt_permit_directio, - Opt_pcr, Opt_template, Opt_err + Opt_appraise_type, Opt_appraise_flag, + Opt_permit_directio, Opt_pcr, Opt_template, Opt_err }; static const match_table_t policy_tokens = { @@ -798,6 +798,7 @@ static const match_table_t policy_tokens = { {Opt_euid_lt, "euid<%s"}, {Opt_fowner_lt, "fowner<%s"}, {Opt_appraise_type, "appraise_type=%s"}, + {Opt_appraise_flag, "appraise_flag=%s"}, {Opt_permit_directio, "permit_directio"}, {Opt_pcr, "pcr=%s"}, {Opt_template, "template=%s"}, @@ -1172,6 +1173,11 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) else result = -EINVAL; break; + case Opt_appraise_flag: + ima_log_string(ab, "appraise_flag", args[0].from); + if (strstr(args[0].from, "blacklist")) + entry->flags |= IMA_CHECK_BLACKLIST; + break; case Opt_permit_directio: entry->flags |= IMA_PERMIT_DIRECTIO; break; @@ -1500,6 +1506,8 @@ int ima_policy_show(struct seq_file *m, void *v) else seq_puts(m, "appraise_type=imasig "); } + if (entry->flags & IMA_CHECK_BLACKLIST) + seq_puts(m, "appraise_flag=check_blacklist "); if (entry->flags & IMA_PERMIT_DIRECTIO) seq_puts(m, "permit_directio "); rcu_read_unlock(); diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index d9323d31a3a83c..73fc286834d7bc 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -32,6 +32,7 @@ #define EVM_IMMUTABLE_DIGSIG 0x08000000 #define IMA_FAIL_UNVERIFIABLE_SIGS 0x10000000 #define IMA_MODSIG_ALLOWED 0x20000000 +#define IMA_CHECK_BLACKLIST 0x40000000 #define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \ IMA_HASH | IMA_APPRAISE_SUBMASK) From dc87f18615db9dc74a75cfb4a57ed33b07a3903a Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Wed, 30 Oct 2019 23:31:33 -0400 Subject: [PATCH 048/144] powerpc/ima: Update ima arch policy to check for blacklist This patch updates the arch-specific policies for PowerNV system to make sure that the binary hash is not blacklisted. Signed-off-by: Nayna Jain Signed-off-by: Mimi Zohar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1572492694-6520-9-git-send-email-zohar@linux.ibm.com --- arch/powerpc/kernel/ima_arch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c index 0ef5956c9753e8..b9de0fb45bb925 100644 --- a/arch/powerpc/kernel/ima_arch.c +++ b/arch/powerpc/kernel/ima_arch.c @@ -23,9 +23,9 @@ bool arch_ima_get_secureboot(void) * is not enabled. */ static const char *const secure_rules[] = { - "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig", + "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", #ifndef CONFIG_MODULE_SIG_FORCE - "appraise func=MODULE_CHECK appraise_type=imasig|modsig", + "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", #endif NULL }; @@ -49,9 +49,9 @@ static const char *const trusted_rules[] = { static const char *const secure_and_trusted_rules[] = { "measure func=KEXEC_KERNEL_CHECK template=ima-modsig", "measure func=MODULE_CHECK template=ima-modsig", - "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig", + "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", #ifndef CONFIG_MODULE_SIG_FORCE - "appraise func=MODULE_CHECK appraise_type=imasig|modsig", + "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", #endif NULL }; From d72ea4915c7e6fa5e7b9022a34df66e375bfe46c Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 30 Oct 2019 23:31:34 -0400 Subject: [PATCH 049/144] powerpc/ima: Indicate kernel modules appended signatures are enforced The arch specific kernel module policy rule requires kernel modules to be signed, either as an IMA signature, stored as an xattr, or as an appended signature. As a result, kernel modules appended signatures could be enforced without "sig_enforce" being set or reflected in /sys/module/module/parameters/sig_enforce. This patch sets "sig_enforce". Signed-off-by: Mimi Zohar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1572492694-6520-10-git-send-email-zohar@linux.ibm.com --- arch/powerpc/kernel/ima_arch.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c index b9de0fb45bb925..e34116255ced81 100644 --- a/arch/powerpc/kernel/ima_arch.c +++ b/arch/powerpc/kernel/ima_arch.c @@ -62,13 +62,17 @@ static const char *const secure_and_trusted_rules[] = { */ const char *const *arch_get_ima_policy(void) { - if (is_ppc_secureboot_enabled()) + if (is_ppc_secureboot_enabled()) { + if (IS_ENABLED(CONFIG_MODULE_SIG)) + set_module_sig_enforced(); + if (is_ppc_trustedboot_enabled()) return secure_and_trusted_rules; else return secure_rules; - else if (is_ppc_trustedboot_enabled()) + } else if (is_ppc_trustedboot_enabled()) { return trusted_rules; + } return NULL; } From 39a963b457b5c6cbbdc70441c9d496e39d151582 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Tue, 1 Oct 2019 19:37:18 -0400 Subject: [PATCH 050/144] sysfs: Fixes __BIN_ATTR_WO() macro This patch fixes the size and write parameter for the macro __BIN_ATTR_WO(). Fixes: 7f905761e15a8 ("sysfs: add BIN_ATTR_WO() macro") Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1569973038-2710-1-git-send-email-nayna@linux.ibm.com --- include/linux/sysfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 5420817ed317eb..fa7ee503fb7632 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -196,9 +196,9 @@ struct bin_attribute { .size = _size, \ } -#define __BIN_ATTR_WO(_name) { \ +#define __BIN_ATTR_WO(_name, _size) { \ .attr = { .name = __stringify(_name), .mode = 0200 }, \ - .store = _name##_store, \ + .write = _name##_write, \ .size = _size, \ } From 9155e2341aa8b5df057dc1c77633b33d1a4f17d2 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 10 Nov 2019 21:10:33 -0600 Subject: [PATCH 051/144] powerpc/powernv: Add OPAL API interface to access secure variable The X.509 certificates trusted by the platform and required to secure boot the OS kernel are wrapped in secure variables, which are controlled by OPAL. This patch adds firmware/kernel interface to read and write OPAL secure variables based on the unique key. This support can be enabled using CONFIG_OPAL_SECVAR. Signed-off-by: Claudio Carvalho Signed-off-by: Nayna Jain Signed-off-by: Eric Richter [mpe: Make secvar_ops __ro_after_init, only build opal-secvar.c if PPC_SECURE_BOOT=y] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573441836-3632-2-git-send-email-nayna@linux.ibm.com --- arch/powerpc/include/asm/opal-api.h | 5 +- arch/powerpc/include/asm/opal.h | 7 + arch/powerpc/include/asm/secvar.h | 35 +++++ arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/secvar-ops.c | 17 +++ arch/powerpc/platforms/powernv/Makefile | 1 + arch/powerpc/platforms/powernv/opal-call.c | 3 + arch/powerpc/platforms/powernv/opal-secvar.c | 140 +++++++++++++++++++ arch/powerpc/platforms/powernv/opal.c | 3 + 9 files changed, 211 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/include/asm/secvar.h create mode 100644 arch/powerpc/kernel/secvar-ops.c create mode 100644 arch/powerpc/platforms/powernv/opal-secvar.c diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 378e3997845ab7..c1f25a760eb13b 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -211,7 +211,10 @@ #define OPAL_MPIPL_UPDATE 173 #define OPAL_MPIPL_REGISTER_TAG 174 #define OPAL_MPIPL_QUERY_TAG 175 -#define OPAL_LAST 175 +#define OPAL_SECVAR_GET 176 +#define OPAL_SECVAR_GET_NEXT 177 +#define OPAL_SECVAR_ENQUEUE_UPDATE 178 +#define OPAL_LAST 178 #define QUIESCE_HOLD 1 /* Spin all calls at entry */ #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a0cf8fba4d12bd..9986ac34b8e224 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -298,6 +298,13 @@ int opal_sensor_group_clear(u32 group_hndl, int token); int opal_sensor_group_enable(u32 group_hndl, int token, bool enable); int opal_nx_coproc_init(uint32_t chip_id, uint32_t ct); +int opal_secvar_get(const char *key, uint64_t key_len, u8 *data, + uint64_t *data_size); +int opal_secvar_get_next(const char *key, uint64_t *key_len, + uint64_t key_buf_size); +int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data, + uint64_t data_size); + s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size); s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr); s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr); diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h new file mode 100644 index 00000000000000..4cc35b58b98627 --- /dev/null +++ b/arch/powerpc/include/asm/secvar.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + * + * PowerPC secure variable operations. + */ +#ifndef SECVAR_OPS_H +#define SECVAR_OPS_H + +#include +#include + +extern const struct secvar_operations *secvar_ops; + +struct secvar_operations { + int (*get)(const char *key, uint64_t key_len, u8 *data, + uint64_t *data_size); + int (*get_next)(const char *key, uint64_t *key_len, + uint64_t keybufsize); + int (*set)(const char *key, uint64_t key_len, u8 *data, + uint64_t data_size); +}; + +#ifdef CONFIG_PPC_SECURE_BOOT + +extern void set_secvar_ops(const struct secvar_operations *ops); + +#else + +static inline void set_secvar_ops(const struct secvar_operations *ops) { } + +#endif + +#endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index e8eb2955b7d5ed..3cf26427334f55 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -161,7 +161,7 @@ ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),) obj-y += ucall.o endif -obj-$(CONFIG_PPC_SECURE_BOOT) += secure_boot.o ima_arch.o +obj-$(CONFIG_PPC_SECURE_BOOT) += secure_boot.o ima_arch.o secvar-ops.o # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c new file mode 100644 index 00000000000000..6a29777d6a2ddb --- /dev/null +++ b/arch/powerpc/kernel/secvar-ops.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + * + * This file initializes secvar operations for PowerPC Secureboot + */ + +#include +#include + +const struct secvar_operations *secvar_ops __ro_after_init; + +void set_secvar_ops(const struct secvar_operations *ops) +{ + secvar_ops = ops; +} diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index a3ac9646119d0b..c0f8120045c3ae 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -20,3 +20,4 @@ obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o obj-$(CONFIG_OCXL_BASE) += ocxl.o obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o +obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index a2aa5e433ac84d..5cd0f52d258f64 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -290,3 +290,6 @@ OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT); OPAL_CALL(opal_mpipl_update, OPAL_MPIPL_UPDATE); OPAL_CALL(opal_mpipl_register_tag, OPAL_MPIPL_REGISTER_TAG); OPAL_CALL(opal_mpipl_query_tag, OPAL_MPIPL_QUERY_TAG); +OPAL_CALL(opal_secvar_get, OPAL_SECVAR_GET); +OPAL_CALL(opal_secvar_get_next, OPAL_SECVAR_GET_NEXT); +OPAL_CALL(opal_secvar_enqueue_update, OPAL_SECVAR_ENQUEUE_UPDATE); diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c new file mode 100644 index 00000000000000..14133e120bdd26 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-secvar.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PowerNV code for secure variables + * + * Copyright (C) 2019 IBM Corporation + * Author: Claudio Carvalho + * Nayna Jain + * + * APIs to access secure variables managed by OPAL. + */ + +#define pr_fmt(fmt) "secvar: "fmt + +#include +#include +#include +#include +#include +#include + +static int opal_status_to_err(int rc) +{ + int err; + + switch (rc) { + case OPAL_SUCCESS: + err = 0; + break; + case OPAL_UNSUPPORTED: + err = -ENXIO; + break; + case OPAL_PARAMETER: + err = -EINVAL; + break; + case OPAL_RESOURCE: + err = -ENOSPC; + break; + case OPAL_HARDWARE: + err = -EIO; + break; + case OPAL_NO_MEM: + err = -ENOMEM; + break; + case OPAL_EMPTY: + err = -ENOENT; + break; + case OPAL_PARTIAL: + err = -EFBIG; + break; + default: + err = -EINVAL; + } + + return err; +} + +static int opal_get_variable(const char *key, uint64_t ksize, + u8 *data, uint64_t *dsize) +{ + int rc; + + if (!key || !dsize) + return -EINVAL; + + *dsize = cpu_to_be64(*dsize); + + rc = opal_secvar_get(key, ksize, data, dsize); + + *dsize = be64_to_cpu(*dsize); + + return opal_status_to_err(rc); +} + +static int opal_get_next_variable(const char *key, uint64_t *keylen, + uint64_t keybufsize) +{ + int rc; + + if (!key || !keylen) + return -EINVAL; + + *keylen = cpu_to_be64(*keylen); + + rc = opal_secvar_get_next(key, keylen, keybufsize); + + *keylen = be64_to_cpu(*keylen); + + return opal_status_to_err(rc); +} + +static int opal_set_variable(const char *key, uint64_t ksize, u8 *data, + uint64_t dsize) +{ + int rc; + + if (!key || !data) + return -EINVAL; + + rc = opal_secvar_enqueue_update(key, ksize, data, dsize); + + return opal_status_to_err(rc); +} + +static const struct secvar_operations opal_secvar_ops = { + .get = opal_get_variable, + .get_next = opal_get_next_variable, + .set = opal_set_variable, +}; + +static int opal_secvar_probe(struct platform_device *pdev) +{ + if (!opal_check_token(OPAL_SECVAR_GET) + || !opal_check_token(OPAL_SECVAR_GET_NEXT) + || !opal_check_token(OPAL_SECVAR_ENQUEUE_UPDATE)) { + pr_err("OPAL doesn't support secure variables\n"); + return -ENODEV; + } + + set_secvar_ops(&opal_secvar_ops); + + return 0; +} + +static const struct of_device_id opal_secvar_match[] = { + { .compatible = "ibm,secvar-backend",}, + {}, +}; + +static struct platform_driver opal_secvar_driver = { + .driver = { + .name = "secvar", + .of_match_table = opal_secvar_match, + }, +}; + +static int __init opal_secvar_init(void) +{ + return platform_driver_probe(&opal_secvar_driver, opal_secvar_probe); +} +device_initcall(opal_secvar_init); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 38e90270280bf6..8355bcd00f93f4 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -1002,6 +1002,9 @@ static int __init opal_init(void) /* Initialise OPAL Power control interface */ opal_power_control_init(); + /* Initialize OPAL secure variables */ + opal_pdev_init("ibm,secvar-backend"); + return 0; } machine_subsys_initcall(powernv, opal_init); From bd5d9c743d38f67d64ea1b512a461f6b5a5f6bec Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 10 Nov 2019 21:10:34 -0600 Subject: [PATCH 052/144] powerpc: expose secure variables to userspace via sysfs PowerNV secure variables, which store the keys used for OS kernel verification, are managed by the firmware. These secure variables need to be accessed by the userspace for addition/deletion of the certificates. This patch adds the sysfs interface to expose secure variables for PowerNV secureboot. The users shall use this interface for manipulating the keys stored in the secure variables. Signed-off-by: Nayna Jain Reviewed-by: Greg Kroah-Hartman Signed-off-by: Eric Richter Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573441836-3632-3-git-send-email-nayna@linux.ibm.com --- Documentation/ABI/testing/sysfs-secvar | 46 +++++ arch/powerpc/Kconfig | 11 ++ arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/secvar-sysfs.c | 248 +++++++++++++++++++++++++ 4 files changed, 306 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-secvar create mode 100644 arch/powerpc/kernel/secvar-sysfs.c diff --git a/Documentation/ABI/testing/sysfs-secvar b/Documentation/ABI/testing/sysfs-secvar new file mode 100644 index 00000000000000..feebb8c57294ca --- /dev/null +++ b/Documentation/ABI/testing/sysfs-secvar @@ -0,0 +1,46 @@ +What: /sys/firmware/secvar +Date: August 2019 +Contact: Nayna Jain +Description: This directory is created if the POWER firmware supports OS + secureboot, thereby secure variables. It exposes interface + for reading/writing the secure variables + +What: /sys/firmware/secvar/vars +Date: August 2019 +Contact: Nayna Jain +Description: This directory lists all the secure variables that are supported + by the firmware. + +What: /sys/firmware/secvar/format +Date: August 2019 +Contact: Nayna Jain +Description: A string indicating which backend is in use by the firmware. + This determines the format of the variable and the accepted + format of variable updates. + +What: /sys/firmware/secvar/vars/ +Date: August 2019 +Contact: Nayna Jain +Description: Each secure variable is represented as a directory named as + . The variable name is unique and is in ASCII + representation. The data and size can be determined by reading + their respective attribute files. + +What: /sys/firmware/secvar/vars//size +Date: August 2019 +Contact: Nayna Jain +Description: An integer representation of the size of the content of the + variable. In other words, it represents the size of the data. + +What: /sys/firmware/secvar/vars//data +Date: August 2019 +Contact: Nayna Jain h +Description: A read-only file containing the value of the variable. The size + of the file represents the maximum size of the variable data. + +What: /sys/firmware/secvar/vars//update +Date: August 2019 +Contact: Nayna Jain +Description: A write-only file that is used to submit the new value for the + variable. The size of the file represents the maximum size of + the variable data that can be written. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c795039bdc73b6..cabc091f3fe1f0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -945,6 +945,17 @@ config PPC_SECURE_BOOT to enable OS secure boot on systems that have firmware support for it. If in doubt say N. +config PPC_SECVAR_SYSFS + bool "Enable sysfs interface for POWER secure variables" + default y + depends on PPC_SECURE_BOOT + depends on SYSFS + help + POWER secure variables are managed and controlled by firmware. + These variables are exposed to userspace via sysfs to enable + read/write operations on these variables. Say Y if you have + secure boot enabled and want to expose variables to userspace. + endmenu config ISA_DMA_API diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 3cf26427334f55..b216e9f316ee1f 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -162,6 +162,7 @@ obj-y += ucall.o endif obj-$(CONFIG_PPC_SECURE_BOOT) += secure_boot.o ima_arch.o secvar-ops.o +obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c new file mode 100644 index 00000000000000..a0a78aba2083e0 --- /dev/null +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2019 IBM Corporation + * + * This code exposes secure variables to user via sysfs + */ + +#define pr_fmt(fmt) "secvar-sysfs: "fmt + +#include +#include +#include +#include +#include + +#define NAME_MAX_SIZE 1024 + +static struct kobject *secvar_kobj; +static struct kset *secvar_kset; + +static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + ssize_t rc = 0; + struct device_node *node; + const char *format; + + node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); + if (!of_device_is_available(node)) + return -ENODEV; + + rc = of_property_read_string(node, "format", &format); + if (rc) + return rc; + + rc = sprintf(buf, "%s\n", format); + + of_node_put(node); + + return rc; +} + + +static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + uint64_t dsize; + int rc; + + rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); + if (rc) { + pr_err("Error retrieving %s variable size %d\n", kobj->name, + rc); + return rc; + } + + return sprintf(buf, "%llu\n", dsize); +} + +static ssize_t data_read(struct file *filep, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + uint64_t dsize; + char *data; + int rc; + + rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); + if (rc) { + pr_err("Error getting %s variable size %d\n", kobj->name, rc); + return rc; + } + pr_debug("dsize is %llu\n", dsize); + + data = kzalloc(dsize, GFP_KERNEL); + if (!data) + return -ENOMEM; + + rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, data, &dsize); + if (rc) { + pr_err("Error getting %s variable %d\n", kobj->name, rc); + goto data_fail; + } + + rc = memory_read_from_buffer(buf, count, &off, data, dsize); + +data_fail: + kfree(data); + return rc; +} + +static ssize_t update_write(struct file *filep, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + int rc; + + pr_debug("count is %ld\n", count); + rc = secvar_ops->set(kobj->name, strlen(kobj->name) + 1, buf, count); + if (rc) { + pr_err("Error setting the %s variable %d\n", kobj->name, rc); + return rc; + } + + return count; +} + +static struct kobj_attribute format_attr = __ATTR_RO(format); + +static struct kobj_attribute size_attr = __ATTR_RO(size); + +static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0); + +static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0); + +static struct bin_attribute *secvar_bin_attrs[] = { + &data_attr, + &update_attr, + NULL, +}; + +static struct attribute *secvar_attrs[] = { + &size_attr.attr, + NULL, +}; + +static const struct attribute_group secvar_attr_group = { + .attrs = secvar_attrs, + .bin_attrs = secvar_bin_attrs, +}; +__ATTRIBUTE_GROUPS(secvar_attr); + +static struct kobj_type secvar_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = secvar_attr_groups, +}; + +static int update_kobj_size(void) +{ + + struct device_node *node; + u64 varsize; + int rc = 0; + + node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } + + rc = of_property_read_u64(node, "max-var-size", &varsize); + if (rc) + goto out; + + data_attr.size = varsize; + update_attr.size = varsize; + +out: + of_node_put(node); + + return rc; +} + +static int secvar_sysfs_load(void) +{ + char *name; + uint64_t namesize = 0; + struct kobject *kobj; + int rc; + + name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL); + if (!name) + return -ENOMEM; + + do { + rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE); + if (rc) { + if (rc != -ENOENT) + pr_err("error getting secvar from firmware %d\n", + rc); + break; + } + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) { + rc = -ENOMEM; + break; + } + + kobject_init(kobj, &secvar_ktype); + + rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name); + if (rc) { + pr_warn("kobject_add error %d for attribute: %s\n", rc, + name); + kobject_put(kobj); + kobj = NULL; + } + + if (kobj) + kobject_uevent(kobj, KOBJ_ADD); + + } while (!rc); + + kfree(name); + return rc; +} + +static int secvar_sysfs_init(void) +{ + int rc; + + if (!secvar_ops) { + pr_warn("secvar: failed to retrieve secvar operations.\n"); + return -ENODEV; + } + + secvar_kobj = kobject_create_and_add("secvar", firmware_kobj); + if (!secvar_kobj) { + pr_err("secvar: Failed to create firmware kobj\n"); + return -ENOMEM; + } + + rc = sysfs_create_file(secvar_kobj, &format_attr.attr); + if (rc) { + kobject_put(secvar_kobj); + return -ENOMEM; + } + + secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj); + if (!secvar_kset) { + pr_err("secvar: sysfs kobject registration failed.\n"); + kobject_put(secvar_kobj); + return -ENOMEM; + } + + rc = update_kobj_size(); + if (rc) { + pr_err("Cannot read the size of the attribute\n"); + return rc; + } + + secvar_sysfs_load(); + + return 0; +} + +late_initcall(secvar_sysfs_init); From ad723674d6758478829ee766e3f1a2a24d56236f Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 10 Nov 2019 21:10:35 -0600 Subject: [PATCH 053/144] x86/efi: move common keyring handler functions to new file The handlers to add the keys to the .platform keyring and blacklisted hashes to the .blacklist keyring is common for both the uefi and powerpc mechanisms of loading the keys/hashes from the firmware. This patch moves the common code from load_uefi.c to keyring_handler.c Signed-off-by: Nayna Jain Acked-by: Mimi Zohar Signed-off-by: Eric Richter Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573441836-3632-4-git-send-email-nayna@linux.ibm.com --- security/integrity/Makefile | 3 +- .../platform_certs/keyring_handler.c | 80 +++++++++++++++++++ .../platform_certs/keyring_handler.h | 32 ++++++++ security/integrity/platform_certs/load_uefi.c | 67 +--------------- 4 files changed, 115 insertions(+), 67 deletions(-) create mode 100644 security/integrity/platform_certs/keyring_handler.c create mode 100644 security/integrity/platform_certs/keyring_handler.h diff --git a/security/integrity/Makefile b/security/integrity/Makefile index 35e6ca77373460..351c9662994b5f 100644 --- a/security/integrity/Makefile +++ b/security/integrity/Makefile @@ -11,7 +11,8 @@ integrity-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o integrity-$(CONFIG_INTEGRITY_ASYMMETRIC_KEYS) += digsig_asymmetric.o integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyring.o integrity-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/efi_parser.o \ - platform_certs/load_uefi.o + platform_certs/load_uefi.o \ + platform_certs/keyring_handler.o integrity-$(CONFIG_LOAD_IPL_KEYS) += platform_certs/load_ipl_s390.o obj-$(CONFIG_IMA) += ima/ diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c new file mode 100644 index 00000000000000..c5ba695c10e3a5 --- /dev/null +++ b/security/integrity/platform_certs/keyring_handler.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../integrity.h" + +static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID; +static efi_guid_t efi_cert_x509_sha256_guid __initdata = + EFI_CERT_X509_SHA256_GUID; +static efi_guid_t efi_cert_sha256_guid __initdata = EFI_CERT_SHA256_GUID; + +/* + * Blacklist a hash. + */ +static __init void uefi_blacklist_hash(const char *source, const void *data, + size_t len, const char *type, + size_t type_len) +{ + char *hash, *p; + + hash = kmalloc(type_len + len * 2 + 1, GFP_KERNEL); + if (!hash) + return; + p = memcpy(hash, type, type_len); + p += type_len; + bin2hex(p, data, len); + p += len * 2; + *p = 0; + + mark_hash_blacklisted(hash); + kfree(hash); +} + +/* + * Blacklist an X509 TBS hash. + */ +static __init void uefi_blacklist_x509_tbs(const char *source, + const void *data, size_t len) +{ + uefi_blacklist_hash(source, data, len, "tbs:", 4); +} + +/* + * Blacklist the hash of an executable. + */ +static __init void uefi_blacklist_binary(const char *source, + const void *data, size_t len) +{ + uefi_blacklist_hash(source, data, len, "bin:", 4); +} + +/* + * Return the appropriate handler for particular signature list types found in + * the UEFI db and MokListRT tables. + */ +__init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type) +{ + if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) + return add_to_platform_keyring; + return 0; +} + +/* + * Return the appropriate handler for particular signature list types found in + * the UEFI dbx and MokListXRT tables. + */ +__init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type) +{ + if (efi_guidcmp(*sig_type, efi_cert_x509_sha256_guid) == 0) + return uefi_blacklist_x509_tbs; + if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0) + return uefi_blacklist_binary; + return 0; +} diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h new file mode 100644 index 00000000000000..2462bfa08fe341 --- /dev/null +++ b/security/integrity/platform_certs/keyring_handler.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef PLATFORM_CERTS_INTERNAL_H +#define PLATFORM_CERTS_INTERNAL_H + +#include + +void blacklist_hash(const char *source, const void *data, + size_t len, const char *type, + size_t type_len); + +/* + * Blacklist an X509 TBS hash. + */ +void blacklist_x509_tbs(const char *source, const void *data, size_t len); + +/* + * Blacklist the hash of an executable. + */ +void blacklist_binary(const char *source, const void *data, size_t len); + +/* + * Return the handler for particular signature list types found in the db. + */ +efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type); + +/* + * Return the handler for particular signature list types found in the dbx. + */ +efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type); + +#endif diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index 81b19c52832ba7..4369204a19cd96 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -9,6 +9,7 @@ #include #include #include "../integrity.h" +#include "keyring_handler.h" static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID; static efi_guid_t efi_cert_x509_sha256_guid __initdata = @@ -67,72 +68,6 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, return db; } -/* - * Blacklist a hash. - */ -static __init void uefi_blacklist_hash(const char *source, const void *data, - size_t len, const char *type, - size_t type_len) -{ - char *hash, *p; - - hash = kmalloc(type_len + len * 2 + 1, GFP_KERNEL); - if (!hash) - return; - p = memcpy(hash, type, type_len); - p += type_len; - bin2hex(p, data, len); - p += len * 2; - *p = 0; - - mark_hash_blacklisted(hash); - kfree(hash); -} - -/* - * Blacklist an X509 TBS hash. - */ -static __init void uefi_blacklist_x509_tbs(const char *source, - const void *data, size_t len) -{ - uefi_blacklist_hash(source, data, len, "tbs:", 4); -} - -/* - * Blacklist the hash of an executable. - */ -static __init void uefi_blacklist_binary(const char *source, - const void *data, size_t len) -{ - uefi_blacklist_hash(source, data, len, "bin:", 4); -} - -/* - * Return the appropriate handler for particular signature list types found in - * the UEFI db and MokListRT tables. - */ -static __init efi_element_handler_t get_handler_for_db(const efi_guid_t * - sig_type) -{ - if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) - return add_to_platform_keyring; - return 0; -} - -/* - * Return the appropriate handler for particular signature list types found in - * the UEFI dbx and MokListXRT tables. - */ -static __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t * - sig_type) -{ - if (efi_guidcmp(*sig_type, efi_cert_x509_sha256_guid) == 0) - return uefi_blacklist_x509_tbs; - if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0) - return uefi_blacklist_binary; - return 0; -} - /* * Load the certs contained in the UEFI databases into the platform trusted * keyring and the UEFI blacklisted X.509 cert SHA256 hashes into the blacklist From 8220e22d11a05049aab9693839ab82e5e177ccde Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 10 Nov 2019 21:10:36 -0600 Subject: [PATCH 054/144] powerpc: Load firmware trusted keys/hashes into kernel keyring The keys used to verify the Host OS kernel are managed by firmware as secure variables. This patch loads the verification keys into the .platform keyring and revocation hashes into .blacklist keyring. This enables verification and loading of the kernels signed by the boot time keys which are trusted by firmware. Signed-off-by: Nayna Jain Reviewed-by: Mimi Zohar Signed-off-by: Eric Richter [mpe: Search by compatible in load_powerpc_certs(), not using format] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573441836-3632-5-git-send-email-nayna@linux.ibm.com --- security/integrity/Kconfig | 9 ++ security/integrity/Makefile | 4 +- .../integrity/platform_certs/load_powerpc.c | 96 +++++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 security/integrity/platform_certs/load_powerpc.c diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig index 0bae6adb63a9c7..71f0177e8716ee 100644 --- a/security/integrity/Kconfig +++ b/security/integrity/Kconfig @@ -72,6 +72,15 @@ config LOAD_IPL_KEYS depends on S390 def_bool y +config LOAD_PPC_KEYS + bool "Enable loading of platform and blacklisted keys for POWER" + depends on INTEGRITY_PLATFORM_KEYRING + depends on PPC_SECURE_BOOT + default y + help + Enable loading of keys to the .platform keyring and blacklisted + hashes to the .blacklist keyring for powerpc based platforms. + config INTEGRITY_AUDIT bool "Enables integrity auditing support " depends on AUDIT diff --git a/security/integrity/Makefile b/security/integrity/Makefile index 351c9662994b5f..7ee39d66cf166c 100644 --- a/security/integrity/Makefile +++ b/security/integrity/Makefile @@ -14,6 +14,8 @@ integrity-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/efi_parser.o \ platform_certs/load_uefi.o \ platform_certs/keyring_handler.o integrity-$(CONFIG_LOAD_IPL_KEYS) += platform_certs/load_ipl_s390.o - +integrity-$(CONFIG_LOAD_PPC_KEYS) += platform_certs/efi_parser.o \ + platform_certs/load_powerpc.o \ + platform_certs/keyring_handler.o obj-$(CONFIG_IMA) += ima/ obj-$(CONFIG_EVM) += evm/ diff --git a/security/integrity/platform_certs/load_powerpc.c b/security/integrity/platform_certs/load_powerpc.c new file mode 100644 index 00000000000000..a2900cb8535712 --- /dev/null +++ b/security/integrity/platform_certs/load_powerpc.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + * + * - loads keys and hashes stored and controlled by the firmware. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "keyring_handler.h" + +/* + * Get a certificate list blob from the named secure variable. + */ +static __init void *get_cert_list(u8 *key, unsigned long keylen, uint64_t *size) +{ + int rc; + void *db; + + rc = secvar_ops->get(key, keylen, NULL, size); + if (rc) { + pr_err("Couldn't get size: %d\n", rc); + return NULL; + } + + db = kmalloc(*size, GFP_KERNEL); + if (!db) + return NULL; + + rc = secvar_ops->get(key, keylen, db, size); + if (rc) { + kfree(db); + pr_err("Error reading %s var: %d\n", key, rc); + return NULL; + } + + return db; +} + +/* + * Load the certs contained in the keys databases into the platform trusted + * keyring and the blacklisted X.509 cert SHA256 hashes into the blacklist + * keyring. + */ +static int __init load_powerpc_certs(void) +{ + void *db = NULL, *dbx = NULL; + uint64_t dbsize = 0, dbxsize = 0; + int rc = 0; + struct device_node *node; + + if (!secvar_ops) + return -ENODEV; + + /* The following only applies for the edk2-compat backend. */ + node = of_find_compatible_node(NULL, NULL, "ibm,edk2-compat-v1"); + if (!node) + return -ENODEV; + + /* + * Get db, and dbx. They might not exist, so it isn't an error if we + * can't get them. + */ + db = get_cert_list("db", 3, &dbsize); + if (!db) { + pr_err("Couldn't get db list from firmware\n"); + } else { + rc = parse_efi_signature_list("powerpc:db", db, dbsize, + get_handler_for_db); + if (rc) + pr_err("Couldn't parse db signatures: %d\n", rc); + kfree(db); + } + + dbx = get_cert_list("dbx", 4, &dbxsize); + if (!dbx) { + pr_info("Couldn't get dbx list from firmware\n"); + } else { + rc = parse_efi_signature_list("powerpc:dbx", dbx, dbxsize, + get_handler_for_dbx); + if (rc) + pr_err("Couldn't parse dbx signatures: %d\n", rc); + kfree(dbx); + } + + of_node_put(node); + + return rc; +} +late_initcall(load_powerpc_certs); From 57409d4fb12c185b2c0689e0496878c8f6bb5b58 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Sun, 10 Nov 2019 23:21:28 -0600 Subject: [PATCH 055/144] powerpc/pseries: Fix bad drc_index_start value parsing of drc-info entry The ibm,drc-info property is an array property that contains drc-info entries such that each entry is made up of 2 string encoded elements followed by 5 int encoded elements. The of_read_drc_info_cell() helper contains comments that correctly name the expected elements and their encoding. However, the usage of of_prop_next_string() and of_prop_next_u32() introduced a subtle skippage of the first u32. This is a result of of_prop_next_string() returning a pointer to the next property value which is not a string, but actually a (__be32 *). As, a result the following call to of_prop_next_u32() passes over the current int encoded value and actually stores the next one wrongly. Simply endian swap the current value in place after reading the first two string values. The remaining int encoded values can then be read correctly using of_prop_next_u32(). Signed-off-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573449697-5448-2-git-send-email-tyreld@linux.ibm.com --- arch/powerpc/platforms/pseries/of_helpers.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c index 6df192f38f8005..66dfd825671235 100644 --- a/arch/powerpc/platforms/pseries/of_helpers.c +++ b/arch/powerpc/platforms/pseries/of_helpers.c @@ -45,14 +45,14 @@ struct device_node *pseries_of_derive_parent(const char *path) int of_read_drc_info_cell(struct property **prop, const __be32 **curval, struct of_drc_info *data) { - const char *p; + const char *p = (char *)(*curval); const __be32 *p2; if (!data) return -EINVAL; /* Get drc-type:encode-string */ - p = data->drc_type = (char*) (*curval); + data->drc_type = (char *)p; p = of_prop_next_string(*prop, p); if (!p) return -EINVAL; @@ -65,9 +65,7 @@ int of_read_drc_info_cell(struct property **prop, const __be32 **curval, /* Get drc-index-start:encode-int */ p2 = (const __be32 *)p; - p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start); - if (!p2) - return -EINVAL; + data->drc_index_start = be32_to_cpu(*p2); /* Get drc-name-suffix-start:encode-int */ p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start); From 775fa495af04e0bdb3a00085aaa2d915ed51388f Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Sun, 10 Nov 2019 23:21:29 -0600 Subject: [PATCH 056/144] powerpc/pseries: Fix drc-info mappings of logical cpus to drc-index There are a couple subtle errors in the mapping between cpu-ids and a cpus associated drc-index when using the new ibm,drc-info property. The first is that while drc-info may have been a supported firmware feature at boot it is possible we have migrated to a CEC with older firmware that doesn't support the ibm,drc-info property. In that case the device tree would have been updated after migration to remove the ibm,drc-info property and replace it with the older style ibm,drc-* properties for types, indexes, names, and power-domains. PAPR even goes as far as dictating that if we advertise support for drc-info that we are capable of supporting either property type at runtime. The second is that the first value of the ibm,drc-info property is the int encoded count of drc-info entries. As such "value" returned by of_prop_next_u32() is pointing at that count, and not the first element of the first drc-info entry as is expected by the of_read_drc_info_cell() helper. Fix the first by ignoring DRC-INFO firmware feature and instead testing directly for ibm,drc-info, and then falling back to the old style ibm,drc-indexes in the case it doesn't exit. Fix the second by incrementing value to the next element prior to parsing drc-info entries. Signed-off-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573449697-5448-3-git-send-email-tyreld@linux.ibm.com --- .../platforms/pseries/pseries_energy.c | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c index a96874f9492f7f..09e98d301db0f3 100644 --- a/arch/powerpc/platforms/pseries/pseries_energy.c +++ b/arch/powerpc/platforms/pseries/pseries_energy.c @@ -36,6 +36,7 @@ static int sysfs_entries; static u32 cpu_to_drc_index(int cpu) { struct device_node *dn = NULL; + struct property *info; int thread_index; int rc = 1; u32 ret = 0; @@ -47,20 +48,18 @@ static u32 cpu_to_drc_index(int cpu) /* Convert logical cpu number to core number */ thread_index = cpu_core_index_of_thread(cpu); - if (firmware_has_feature(FW_FEATURE_DRC_INFO)) { - struct property *info = NULL; + info = of_find_property(dn, "ibm,drc-info", NULL); + if (info) { struct of_drc_info drc; int j; u32 num_set_entries; const __be32 *value; - info = of_find_property(dn, "ibm,drc-info", NULL); - if (info == NULL) - goto err_of_node_put; - value = of_prop_next_u32(info, NULL, &num_set_entries); if (!value) goto err_of_node_put; + else + value++; for (j = 0; j < num_set_entries; j++) { @@ -110,6 +109,7 @@ static u32 cpu_to_drc_index(int cpu) static int drc_index_to_cpu(u32 drc_index) { struct device_node *dn = NULL; + struct property *info; const int *indexes; int thread_index = 0, cpu = 0; int rc = 1; @@ -117,21 +117,18 @@ static int drc_index_to_cpu(u32 drc_index) dn = of_find_node_by_path("/cpus"); if (dn == NULL) goto err; - - if (firmware_has_feature(FW_FEATURE_DRC_INFO)) { - struct property *info = NULL; + info = of_find_property(dn, "ibm,drc-info", NULL); + if (info) { struct of_drc_info drc; int j; u32 num_set_entries; const __be32 *value; - info = of_find_property(dn, "ibm,drc-info", NULL); - if (info == NULL) - goto err_of_node_put; - value = of_prop_next_u32(info, NULL, &num_set_entries); if (!value) goto err_of_node_put; + else + value++; for (j = 0; j < num_set_entries; j++) { From b015f6bc9547dbc056edde7177c7868ca8629c4c Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Sun, 10 Nov 2019 23:21:30 -0600 Subject: [PATCH 057/144] powerpc/pseries: Add cpu DLPAR support for drc-info property Older firmwares provided information about Dynamic Reconfig Connectors (DRC) through several device tree properties, namely ibm,drc-types, ibm,drc-indexes, ibm,drc-names, and ibm,drc-power-domains. New firmwares have the ability to present this same information in a much condensed format through a device tree property called ibm,drc-info. The existing cpu DLPAR hotplug code only understands the older DRC property format when validating the drc-index of a cpu during a hotplug add. This updates those code paths to use the ibm,drc-info property, when present, instead for validation. Signed-off-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573449697-5448-4-git-send-email-tyreld@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 127 ++++++++++++++++--- 1 file changed, 112 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 8ab24bd7f89c65..3e8cbfe7a80fde 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -465,17 +465,67 @@ static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index) return found; } +static bool drc_info_valid_index(struct device_node *parent, u32 drc_index) +{ + struct property *info; + struct of_drc_info drc; + const __be32 *value; + u32 index; + int count, i, j; + + info = of_find_property(parent, "ibm,drc-info", NULL); + if (!info) + return false; + + value = of_prop_next_u32(info, NULL, &count); + + /* First value of ibm,drc-info is number of drc-info records */ + if (value) + value++; + else + return false; + + for (i = 0; i < count; i++) { + if (of_read_drc_info_cell(&info, &value, &drc)) + return false; + + if (strncmp(drc.drc_type, "CPU", 3)) + break; + + if (drc_index > drc.last_drc_index) + continue; + + index = drc.drc_index_start; + for (j = 0; j < drc.num_sequential_elems; j++) { + if (drc_index == index) + return true; + + index += drc.sequential_inc; + } + } + + return false; +} + static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index) { bool found = false; int rc, index; - index = 0; + if (of_find_property(parent, "ibm,drc-info", NULL)) + return drc_info_valid_index(parent, drc_index); + + /* Note that the format of the ibm,drc-indexes array is + * the number of entries in the array followed by the array + * of drc values so we start looking at index = 1. + */ + index = 1; while (!found) { u32 drc; rc = of_property_read_u32_index(parent, "ibm,drc-indexes", index++, &drc); + if (rc) break; @@ -718,19 +768,52 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove) return rc; } -static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add) +static int find_drc_info_cpus_to_add(struct device_node *cpus, + struct property *info, + u32 *cpu_drcs, u32 cpus_to_add) { - struct device_node *parent; + struct of_drc_info drc; + const __be32 *value; + u32 count, drc_index; int cpus_found = 0; - int index, rc; + int i, j; - parent = of_find_node_by_path("/cpus"); - if (!parent) { - pr_warn("Could not find CPU root node in device tree\n"); - kfree(cpu_drcs); + if (!info) return -1; + + value = of_prop_next_u32(info, NULL, &count); + if (value) + value++; + + for (i = 0; i < count; i++) { + of_read_drc_info_cell(&info, &value, &drc); + if (strncmp(drc.drc_type, "CPU", 3)) + break; + + drc_index = drc.drc_index_start; + for (j = 0; j < drc.num_sequential_elems; j++) { + if (dlpar_cpu_exists(cpus, drc_index)) + continue; + + cpu_drcs[cpus_found++] = drc_index; + + if (cpus_found == cpus_to_add) + return cpus_found; + + drc_index += drc.sequential_inc; + } } + return cpus_found; +} + +static int find_drc_index_cpus_to_add(struct device_node *cpus, + u32 *cpu_drcs, u32 cpus_to_add) +{ + int cpus_found = 0; + int index, rc; + u32 drc_index; + /* Search the ibm,drc-indexes array for possible CPU drcs to * add. Note that the format of the ibm,drc-indexes array is * the number of entries in the array followed by the array @@ -738,25 +821,25 @@ static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add) */ index = 1; while (cpus_found < cpus_to_add) { - u32 drc; + rc = of_property_read_u32_index(cpus, "ibm,drc-indexes", + index++, &drc_index); - rc = of_property_read_u32_index(parent, "ibm,drc-indexes", - index++, &drc); if (rc) break; - if (dlpar_cpu_exists(parent, drc)) + if (dlpar_cpu_exists(cpus, drc_index)) continue; - cpu_drcs[cpus_found++] = drc; + cpu_drcs[cpus_found++] = drc_index; } - of_node_put(parent); return cpus_found; } static int dlpar_cpu_add_by_count(u32 cpus_to_add) { + struct device_node *parent; + struct property *info; u32 *cpu_drcs; int cpus_added = 0; int cpus_found; @@ -768,7 +851,21 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) if (!cpu_drcs) return -EINVAL; - cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add); + parent = of_find_node_by_path("/cpus"); + if (!parent) { + pr_warn("Could not find CPU root node in device tree\n"); + kfree(cpu_drcs); + return -1; + } + + info = of_find_property(parent, "ibm,drc-info", NULL); + if (info) + cpus_found = find_drc_info_cpus_to_add(parent, info, cpu_drcs, cpus_to_add); + else + cpus_found = find_drc_index_cpus_to_add(parent, cpu_drcs, cpus_to_add); + + of_node_put(parent); + if (cpus_found < cpus_to_add) { pr_warn("Failed to find enough CPUs (%d of %d) to add\n", cpus_found, cpus_to_add); From 9723c25f99aff0451cfe6392e1b9fdd99d0bf9f0 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Sun, 10 Nov 2019 23:21:31 -0600 Subject: [PATCH 058/144] PCI: rpaphp: Fix up pointer to first drc-info entry The first entry of the ibm,drc-info property is an int encoded count of the number of drc-info entries that follow. The "value" pointer returned by of_prop_next_u32() is still pointing at the this value when we call of_read_drc_info_cell(), but the helper function expects that value to be pointing at the first element of an entry. Fix up by incrementing the "value" pointer to point at the first element of the first drc-info entry prior. Signed-off-by: Tyrel Datwyler Acked-by: Bjorn Helgaas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573449697-5448-5-git-send-email-tyreld@linux.ibm.com --- drivers/pci/hotplug/rpaphp_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 18627bb21e9ec5..e3502644a45c36 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -239,6 +239,8 @@ static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name, value = of_prop_next_u32(info, NULL, &entries); if (!value) return -EINVAL; + else + value++; for (j = 0; j < entries; j++) { of_read_drc_info_cell(&info, &value, &drc); From 52e2b0f16574afd082cff0f0e8567b2d9f68c033 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Sun, 10 Nov 2019 23:21:32 -0600 Subject: [PATCH 059/144] PCI: rpaphp: Don't rely on firmware feature to imply drc-info support In the event that the partition is migrated to a platform with older firmware that doesn't support the ibm,drc-info property the device tree is modified to remove the ibm,drc-info property and replace it with the older style ibm,drc-* properties for types, names, indexes, and power-domains. One of the requirements of the drc-info firmware feature is that the client is able to handle both the new property, and old style properties at runtime. Therefore we can't rely on the firmware feature alone to dictate which property is currently present in the device tree. Fix this short coming by checking explicitly for the ibm,drc-info property, and falling back to the older ibm,drc-* properties if it doesn't exist. Signed-off-by: Tyrel Datwyler Acked-by: Bjorn Helgaas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573449697-5448-6-git-send-email-tyreld@linux.ibm.com --- drivers/pci/hotplug/rpaphp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index e3502644a45c36..e18e9a0e959c82 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -275,7 +275,7 @@ int rpaphp_check_drc_props(struct device_node *dn, char *drc_name, return -EINVAL; } - if (firmware_has_feature(FW_FEATURE_DRC_INFO)) + if (of_find_property(dn->parent, "ibm,drc-info", NULL)) return rpaphp_check_drc_props_v2(dn, drc_name, drc_type, *my_index); else From efeda8fada43783635f0274b0a0fa0d0fb6debb8 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Sun, 10 Nov 2019 23:21:33 -0600 Subject: [PATCH 060/144] PCI: rpaphp: Add drc-info support for hotplug slot registration Split physical PCI slot registration scanning into separate routines that support the old ibm,drc-* properties and one that supports the new compressed ibm,drc-info property. Signed-off-by: Tyrel Datwyler Acked-by: Bjorn Helgaas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573449697-5448-7-git-send-email-tyreld@linux.ibm.com --- drivers/pci/hotplug/rpaphp_core.c | 89 ++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 20 deletions(-) diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index e18e9a0e959c82..75d577126144c9 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -328,23 +328,48 @@ static int is_php_dn(struct device_node *dn, const int **indexes, return 1; } -/** - * rpaphp_add_slot -- declare a hotplug slot to the hotplug subsystem. - * @dn: device node of slot - * - * This subroutine will register a hotpluggable slot with the - * PCI hotplug infrastructure. This routine is typically called - * during boot time, if the hotplug slots are present at boot time, - * or is called later, by the dlpar add code, if the slot is - * being dynamically added during runtime. - * - * If the device node points at an embedded (built-in) slot, this - * routine will just return without doing anything, since embedded - * slots cannot be hotplugged. - * - * To remove a slot, it suffices to call rpaphp_deregister_slot(). - */ -int rpaphp_add_slot(struct device_node *dn) +static int rpaphp_drc_info_add_slot(struct device_node *dn) +{ + struct slot *slot; + struct property *info; + struct of_drc_info drc; + char drc_name[MAX_DRC_NAME_LEN]; + const __be32 *cur; + u32 count; + int retval = 0; + + info = of_find_property(dn, "ibm,drc-info", NULL); + if (!info) + return 0; + + cur = of_prop_next_u32(info, NULL, &count); + if (cur) + cur++; + else + return 0; + + of_read_drc_info_cell(&info, &cur, &drc); + if (!is_php_type(drc.drc_type)) + return 0; + + sprintf(drc_name, "%s%d", drc.drc_name_prefix, drc.drc_name_suffix_start); + + slot = alloc_slot_struct(dn, drc.drc_index_start, drc_name, drc.drc_power_domain); + if (!slot) + return -ENOMEM; + + slot->type = simple_strtoul(drc.drc_type, NULL, 10); + retval = rpaphp_enable_slot(slot); + if (!retval) + retval = rpaphp_register_slot(slot); + + if (retval) + dealloc_slot_struct(slot); + + return retval; +} + +static int rpaphp_drc_add_slot(struct device_node *dn) { struct slot *slot; int retval = 0; @@ -352,9 +377,6 @@ int rpaphp_add_slot(struct device_node *dn) const int *indexes, *names, *types, *power_domains; char *name, *type; - if (!dn->name || strcmp(dn->name, "pci")) - return 0; - /* If this is not a hotplug slot, return without doing anything. */ if (!is_php_dn(dn, &indexes, &names, &types, &power_domains)) return 0; @@ -393,6 +415,33 @@ int rpaphp_add_slot(struct device_node *dn) /* XXX FIXME: reports a failure only if last entry in loop failed */ return retval; } + +/** + * rpaphp_add_slot -- declare a hotplug slot to the hotplug subsystem. + * @dn: device node of slot + * + * This subroutine will register a hotpluggable slot with the + * PCI hotplug infrastructure. This routine is typically called + * during boot time, if the hotplug slots are present at boot time, + * or is called later, by the dlpar add code, if the slot is + * being dynamically added during runtime. + * + * If the device node points at an embedded (built-in) slot, this + * routine will just return without doing anything, since embedded + * slots cannot be hotplugged. + * + * To remove a slot, it suffices to call rpaphp_deregister_slot(). + */ +int rpaphp_add_slot(struct device_node *dn) +{ + if (!dn->name || strcmp(dn->name, "pci")) + return 0; + + if (of_find_property(dn, "ibm,drc-info", NULL)) + return rpaphp_drc_info_add_slot(dn); + else + return rpaphp_drc_add_slot(dn); +} EXPORT_SYMBOL_GPL(rpaphp_add_slot); static void __exit cleanup_slots(void) From 0737686778c6dbe0908d684dd5b9c05b127526ba Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Sun, 10 Nov 2019 23:21:35 -0600 Subject: [PATCH 061/144] PCI: rpaphp: Annotate and correctly byte swap DRC properties The device tree is in big endian format and any properties directly retrieved using OF helpers that don't explicitly byte swap should be annotated. In particular there are several places where we grab the opaque property value for the old ibm,drc-* properties and the ibm,my-drc-index property. Fix this for better static checking by annotating values we know to explicitly big endian, and byte swap where appropriate. Signed-off-by: Tyrel Datwyler Acked-by: Bjorn Helgaas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573449697-5448-9-git-send-email-tyreld@linux.ibm.com --- drivers/pci/hotplug/rpaphp_core.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 75d577126144c9..129534c6ad6b43 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -154,11 +154,11 @@ static enum pci_bus_speed get_max_bus_speed(struct slot *slot) return speed; } -static int get_children_props(struct device_node *dn, const int **drc_indexes, - const int **drc_names, const int **drc_types, - const int **drc_power_domains) +static int get_children_props(struct device_node *dn, const __be32 **drc_indexes, + const __be32 **drc_names, const __be32 **drc_types, + const __be32 **drc_power_domains) { - const int *indexes, *names, *types, *domains; + const __be32 *indexes, *names, *types, *domains; indexes = of_get_property(dn, "ibm,drc-indexes", NULL); names = of_get_property(dn, "ibm,drc-names", NULL); @@ -194,8 +194,8 @@ static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name, char *drc_type, unsigned int my_index) { char *name_tmp, *type_tmp; - const int *indexes, *names; - const int *types, *domains; + const __be32 *indexes, *names; + const __be32 *types, *domains; int i, rc; rc = get_children_props(dn->parent, &indexes, &names, &types, &domains); @@ -208,7 +208,7 @@ static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name, /* Iterate through parent properties, looking for my-drc-index */ for (i = 0; i < be32_to_cpu(indexes[0]); i++) { - if ((unsigned int) indexes[i + 1] == my_index) + if (be32_to_cpu(indexes[i + 1]) == my_index) break; name_tmp += (strlen(name_tmp) + 1); @@ -267,7 +267,7 @@ static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name, int rpaphp_check_drc_props(struct device_node *dn, char *drc_name, char *drc_type) { - const unsigned int *my_index; + const __be32 *my_index; my_index = of_get_property(dn, "ibm,my-drc-index", NULL); if (!my_index) { @@ -277,10 +277,10 @@ int rpaphp_check_drc_props(struct device_node *dn, char *drc_name, if (of_find_property(dn->parent, "ibm,drc-info", NULL)) return rpaphp_check_drc_props_v2(dn, drc_name, drc_type, - *my_index); + be32_to_cpu(*my_index)); else return rpaphp_check_drc_props_v1(dn, drc_name, drc_type, - *my_index); + be32_to_cpu(*my_index)); } EXPORT_SYMBOL_GPL(rpaphp_check_drc_props); @@ -311,10 +311,11 @@ static int is_php_type(char *drc_type) * for built-in pci slots (even when the built-in slots are * dlparable.) */ -static int is_php_dn(struct device_node *dn, const int **indexes, - const int **names, const int **types, const int **power_domains) +static int is_php_dn(struct device_node *dn, const __be32 **indexes, + const __be32 **names, const __be32 **types, + const __be32 **power_domains) { - const int *drc_types; + const __be32 *drc_types; int rc; rc = get_children_props(dn, indexes, names, &drc_types, power_domains); @@ -374,7 +375,7 @@ static int rpaphp_drc_add_slot(struct device_node *dn) struct slot *slot; int retval = 0; int i; - const int *indexes, *names, *types, *power_domains; + const __be32 *indexes, *names, *types, *power_domains; char *name, *type; /* If this is not a hotplug slot, return without doing anything. */ From 4f9f2d3d7a434b7f882b72550194c9278f4a3925 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Sun, 10 Nov 2019 23:21:36 -0600 Subject: [PATCH 062/144] PCI: rpaphp: Correctly match ibm, my-drc-index to drc-name when using drc-info The newer ibm,drc-info property is a condensed description of the old ibm,drc-* properties (ie. names, types, indexes, and power-domains). When matching a drc-index to a drc-name we need to verify that the index is within the start and last drc-index range and map it to a drc-name using the drc-name-prefix and logical index. Fix the mapping by checking that the index is within the range of the current drc-info entry, and build the name from the drc-name-prefix concatenated with the starting drc-name-suffix value and the sequential index obtained by subtracting ibm,my-drc-index from this entries drc-start-index. Signed-off-by: Tyrel Datwyler Acked-by: Bjorn Helgaas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573449697-5448-10-git-send-email-tyreld@linux.ibm.com --- drivers/pci/hotplug/rpaphp_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 129534c6ad6b43..951f7f216fb312 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -248,9 +248,10 @@ static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name, /* Should now know end of current entry */ /* Found it */ - if (my_index <= drc.last_drc_index) { + if (my_index >= drc.drc_index_start && my_index <= drc.last_drc_index) { + int index = my_index - drc.drc_index_start; sprintf(cell_drc_name, "%s%d", drc.drc_name_prefix, - my_index); + drc.drc_name_suffix_start + index); break; } } From 0a87ccd3699983645f54cafd2258514a716b20b8 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Sun, 10 Nov 2019 23:21:37 -0600 Subject: [PATCH 063/144] powerpc/pseries: Enable support for ibm,drc-info property Advertise client support for the PAPR architected ibm,drc-info device tree property during CAS handshake. Fixes: c7a3275e0f9e ("powerpc/pseries: Revert support for ibm,drc-info devtree property") Signed-off-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1573449697-5448-11-git-send-email-tyreld@linux.ibm.com --- arch/powerpc/kernel/prom_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 90987db10974c3..577345382b23f9 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1065,7 +1065,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .reserved2 = 0, .reserved3 = 0, .subprocessors = 1, - .byte22 = OV5_FEAT(OV5_DRMEM_V2), + .byte22 = OV5_FEAT(OV5_DRMEM_V2) | OV5_FEAT(OV5_DRC_INFO), .intarch = 0, .mmu = 0, .hash_ext = 0, From 7d8212747435c534c8d564fbef4541a463c976ff Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 31 Oct 2019 15:29:22 +0100 Subject: [PATCH 064/144] powerpc/pseries/cmm: Implement release() function for sysfs device When unloading the module, one gets ------------[ cut here ]------------ Device 'cmm0' does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt. WARNING: CPU: 0 PID: 19308 at drivers/base/core.c:1244 .device_release+0xcc/0xf0 ... We only have one static fake device. There is nothing to do when releasing the device (via cmm_exit()). Signed-off-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031142933.10779-2-david@redhat.com --- arch/powerpc/platforms/pseries/cmm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index b33251d75927bc..572651a5c87bbd 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -411,6 +411,10 @@ static struct bus_type cmm_subsys = { .dev_name = "cmm", }; +static void cmm_release_device(struct device *dev) +{ +} + /** * cmm_sysfs_register - Register with sysfs * @@ -426,6 +430,7 @@ static int cmm_sysfs_register(struct device *dev) dev->id = 0; dev->bus = &cmm_subsys; + dev->release = cmm_release_device; if ((rc = device_register(dev))) goto subsys_unregister; From 022da223180137b25f070a7d0b1fe114e1e87433 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 31 Oct 2019 15:29:23 +0100 Subject: [PATCH 065/144] powerpc/pseries/cmm: Report errors when registering notifiers fails If we don't set the rc, we will return "0", making it look like we succeeded. Signed-off-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031142933.10779-3-david@redhat.com --- arch/powerpc/platforms/pseries/cmm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 572651a5c87bbd..fab049d3ea1eae 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -683,8 +683,12 @@ static int cmm_init(void) if ((rc = cmm_sysfs_register(&cmm_dev))) goto out_reboot_notifier; - if (register_memory_notifier(&cmm_mem_nb) || - register_memory_isolate_notifier(&cmm_mem_isolate_nb)) + rc = register_memory_notifier(&cmm_mem_nb); + if (rc) + goto out_unregister_notifier; + + rc = register_memory_isolate_notifier(&cmm_mem_isolate_nb); + if (rc) goto out_unregister_notifier; if (cmm_disabled) From 68f7a04932bbcd72973fd58b16a817f4bf99171a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 31 Oct 2019 15:29:24 +0100 Subject: [PATCH 066/144] powerpc/pseries/cmm: Cleanup rc handling in cmm_init() No need to initialize rc. Also, let's return 0 directly when succeeding. Signed-off-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031142933.10779-4-david@redhat.com --- arch/powerpc/platforms/pseries/cmm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index fab049d3ea1eae..738eb1681d4055 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -669,7 +669,7 @@ static struct notifier_block cmm_mem_nb = { **/ static int cmm_init(void) { - int rc = -ENOMEM; + int rc; if (!firmware_has_feature(FW_FEATURE_CMO)) return -EOPNOTSUPP; @@ -692,7 +692,7 @@ static int cmm_init(void) goto out_unregister_notifier; if (cmm_disabled) - return rc; + return 0; cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); if (IS_ERR(cmm_thread_ptr)) { @@ -700,8 +700,7 @@ static int cmm_init(void) goto out_unregister_notifier; } - return rc; - + return 0; out_unregister_notifier: unregister_memory_notifier(&cmm_mem_nb); unregister_memory_isolate_notifier(&cmm_mem_isolate_nb); From 4a1745c5bf92232f115e28296475dc42254b1c7d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 31 Oct 2019 15:29:25 +0100 Subject: [PATCH 067/144] powerpc/pseries/cmm: Drop page array We can simply store the pages in a list (page->lru), no need for a separate data structure (+ complicated handling). This is how most other balloon drivers store allocated pages without additional tracking data. For the notifiers, use page_to_pfn() to check if a page is in the applicable range. Use page_to_phys() in plpar_page_set_loaned() and plpar_page_set_active() (I assume due to the __pa() that's the right thing to do). Signed-off-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031142933.10779-5-david@redhat.com --- arch/powerpc/platforms/pseries/cmm.c | 163 ++++++--------------------- 1 file changed, 36 insertions(+), 127 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 738eb1681d4055..33d31e48ec1588 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -75,21 +75,13 @@ module_param_named(debug, cmm_debug, uint, 0644); MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " "[Default=" __stringify(CMM_DEBUG) "]"); -#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long)) - #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); } -struct cmm_page_array { - struct cmm_page_array *next; - unsigned long index; - unsigned long page[CMM_NR_PAGES]; -}; - static unsigned long loaned_pages; static unsigned long loaned_pages_target; static unsigned long oom_freed_pages; -static struct cmm_page_array *cmm_page_list; +static LIST_HEAD(cmm_page_list); static DEFINE_SPINLOCK(cmm_lock); static DEFINE_MUTEX(hotplug_mutex); @@ -97,8 +89,9 @@ static int hotplug_occurred; /* protected by the hotplug mutex */ static struct task_struct *cmm_thread_ptr; -static long plpar_page_set_loaned(unsigned long vpa) +static long plpar_page_set_loaned(struct page *page) { + const unsigned long vpa = page_to_phys(page); unsigned long cmo_page_sz = cmo_get_page_size(); long rc = 0; int i; @@ -113,8 +106,9 @@ static long plpar_page_set_loaned(unsigned long vpa) return rc; } -static long plpar_page_set_active(unsigned long vpa) +static long plpar_page_set_active(struct page *page) { + const unsigned long vpa = page_to_phys(page); unsigned long cmo_page_sz = cmo_get_page_size(); long rc = 0; int i; @@ -138,8 +132,7 @@ static long plpar_page_set_active(unsigned long vpa) **/ static long cmm_alloc_pages(long nr) { - struct cmm_page_array *pa, *npa; - unsigned long addr; + struct page *page; long rc; cmm_dbg("Begin request for %ld pages\n", nr); @@ -156,43 +149,20 @@ static long cmm_alloc_pages(long nr) break; } - addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | - __GFP_NORETRY | __GFP_NOMEMALLOC); - if (!addr) + page = alloc_page(GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC); + if (!page) break; spin_lock(&cmm_lock); - pa = cmm_page_list; - if (!pa || pa->index >= CMM_NR_PAGES) { - /* Need a new page for the page list. */ - spin_unlock(&cmm_lock); - npa = (struct cmm_page_array *)__get_free_page( - GFP_NOIO | __GFP_NOWARN | - __GFP_NORETRY | __GFP_NOMEMALLOC); - if (!npa) { - pr_info("%s: Can not allocate new page list\n", __func__); - free_page(addr); - break; - } - spin_lock(&cmm_lock); - pa = cmm_page_list; - - if (!pa || pa->index >= CMM_NR_PAGES) { - npa->next = pa; - npa->index = 0; - pa = npa; - cmm_page_list = pa; - } else - free_page((unsigned long) npa); - } - - if ((rc = plpar_page_set_loaned(__pa(addr)))) { + rc = plpar_page_set_loaned(page); + if (rc) { pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc); spin_unlock(&cmm_lock); - free_page(addr); + __free_page(page); break; } - pa->page[pa->index++] = addr; + list_add(&page->lru, &cmm_page_list); loaned_pages++; totalram_pages_dec(); spin_unlock(&cmm_lock); @@ -212,25 +182,16 @@ static long cmm_alloc_pages(long nr) **/ static long cmm_free_pages(long nr) { - struct cmm_page_array *pa; - unsigned long addr; + struct page *page, *tmp; cmm_dbg("Begin free of %ld pages.\n", nr); spin_lock(&cmm_lock); - pa = cmm_page_list; - while (nr) { - if (!pa || pa->index <= 0) + list_for_each_entry_safe(page, tmp, &cmm_page_list, lru) { + if (!nr) break; - addr = pa->page[--pa->index]; - - if (pa->index == 0) { - pa = pa->next; - free_page((unsigned long) cmm_page_list); - cmm_page_list = pa; - } - - plpar_page_set_active(__pa(addr)); - free_page(addr); + plpar_page_set_active(page); + list_del(&page->lru); + __free_page(page); loaned_pages--; nr--; totalram_pages_inc(); @@ -496,20 +457,13 @@ static struct notifier_block cmm_reboot_nb = { static unsigned long cmm_count_pages(void *arg) { struct memory_isolate_notify *marg = arg; - struct cmm_page_array *pa; - unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn); - unsigned long end = start + (marg->nr_pages << PAGE_SHIFT); - unsigned long idx; + struct page *page; spin_lock(&cmm_lock); - pa = cmm_page_list; - while (pa) { - if ((unsigned long)pa >= start && (unsigned long)pa < end) + list_for_each_entry(page, &cmm_page_list, lru) { + if (page_to_pfn(page) >= marg->start_pfn && + page_to_pfn(page) < marg->start_pfn + marg->nr_pages) marg->pages_found++; - for (idx = 0; idx < pa->index; idx++) - if (pa->page[idx] >= start && pa->page[idx] < end) - marg->pages_found++; - pa = pa->next; } spin_unlock(&cmm_lock); return 0; @@ -550,69 +504,24 @@ static struct notifier_block cmm_mem_isolate_nb = { static int cmm_mem_going_offline(void *arg) { struct memory_notify *marg = arg; - unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn); - unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT); - struct cmm_page_array *pa_curr, *pa_last, *npa; - unsigned long idx; + struct page *page, *tmp; unsigned long freed = 0; - cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n", - start_page, marg->nr_pages); + cmm_dbg("Memory going offline, searching PFN 0x%lx (%ld pages).\n", + marg->start_pfn, marg->nr_pages); spin_lock(&cmm_lock); /* Search the page list for pages in the range to be offlined */ - pa_last = pa_curr = cmm_page_list; - while (pa_curr) { - for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) { - if ((pa_curr->page[idx] < start_page) || - (pa_curr->page[idx] >= end_page)) - continue; - - plpar_page_set_active(__pa(pa_curr->page[idx])); - free_page(pa_curr->page[idx]); - freed++; - loaned_pages--; - totalram_pages_inc(); - pa_curr->page[idx] = pa_last->page[--pa_last->index]; - if (pa_last->index == 0) { - if (pa_curr == pa_last) - pa_curr = pa_last->next; - pa_last = pa_last->next; - free_page((unsigned long)cmm_page_list); - cmm_page_list = pa_last; - } - } - pa_curr = pa_curr->next; - } - - /* Search for page list structures in the range to be offlined */ - pa_last = NULL; - pa_curr = cmm_page_list; - while (pa_curr) { - if (((unsigned long)pa_curr >= start_page) && - ((unsigned long)pa_curr < end_page)) { - npa = (struct cmm_page_array *)__get_free_page( - GFP_NOIO | __GFP_NOWARN | - __GFP_NORETRY | __GFP_NOMEMALLOC); - if (!npa) { - spin_unlock(&cmm_lock); - cmm_dbg("Failed to allocate memory for list " - "management. Memory hotplug " - "failed.\n"); - return -ENOMEM; - } - memcpy(npa, pa_curr, PAGE_SIZE); - if (pa_curr == cmm_page_list) - cmm_page_list = npa; - if (pa_last) - pa_last->next = npa; - free_page((unsigned long) pa_curr); - freed++; - pa_curr = npa; - } - - pa_last = pa_curr; - pa_curr = pa_curr->next; + list_for_each_entry_safe(page, tmp, &cmm_page_list, lru) { + if (page_to_pfn(page) < marg->start_pfn || + page_to_pfn(page) >= marg->start_pfn + marg->nr_pages) + continue; + plpar_page_set_active(page); + list_del(&page->lru); + __free_page(page); + freed++; + loaned_pages--; + totalram_pages_inc(); } spin_unlock(&cmm_lock); From 287b89773d8172df049f0f4c27946b2ae4ac4b41 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 31 Oct 2019 15:29:26 +0100 Subject: [PATCH 068/144] powerpc/pseries/cmm: Use adjust_managed_page_count() insted of totalram_pages_* adjust_managed_page_count() performs a totalram_pages_add(), but also adjusts the managed pages of the zone. Let's use that instead, similar to virtio-balloon. Use it before freeing a page. Signed-off-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031142933.10779-6-david@redhat.com --- arch/powerpc/platforms/pseries/cmm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 33d31e48ec1588..f82c468ca2c4bd 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -164,7 +164,7 @@ static long cmm_alloc_pages(long nr) list_add(&page->lru, &cmm_page_list); loaned_pages++; - totalram_pages_dec(); + adjust_managed_page_count(page, -1); spin_unlock(&cmm_lock); nr--; } @@ -191,10 +191,10 @@ static long cmm_free_pages(long nr) break; plpar_page_set_active(page); list_del(&page->lru); + adjust_managed_page_count(page, 1); __free_page(page); loaned_pages--; nr--; - totalram_pages_inc(); } spin_unlock(&cmm_lock); cmm_dbg("End request with %ld pages unfulfilled\n", nr); @@ -518,10 +518,10 @@ static int cmm_mem_going_offline(void *arg) continue; plpar_page_set_active(page); list_del(&page->lru); + adjust_managed_page_count(page, 1); __free_page(page); freed++; loaned_pages--; - totalram_pages_inc(); } spin_unlock(&cmm_lock); From 7659f5d6448095ef436891c33bdd7c8500620a00 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 31 Oct 2019 15:29:27 +0100 Subject: [PATCH 069/144] powerpc/pseries/cmm: Rip out memory isolate notifier The memory isolate notifier was added to allow to offline memory blocks that contain inflated/"loaned" pages. We can achieve the same using the balloon compaction framework. Get rid of the memory isolate notifier. Also, we can get rid of cmm_mem_going_offline(), as we will never reach that code path now when we have allocated memory in the balloon (allocated pages are unmovable and will no longer be special-cased using the memory isolation notifier). Leave the memory notifier in place, so we can still back off in case memory gets offlined. Signed-off-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031142933.10779-7-david@redhat.com --- arch/powerpc/platforms/pseries/cmm.c | 97 +--------------------------- 1 file changed, 1 insertion(+), 96 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index f82c468ca2c4bd..29416b621189f7 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -38,12 +38,8 @@ #define CMM_MIN_MEM_MB 256 #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) -/* - * The priority level tries to ensure that this notifier is called as - * late as possible to reduce thrashing in the shared memory pool. - */ + #define CMM_MEM_HOTPLUG_PRI 1 -#define CMM_MEM_ISOLATE_PRI 15 static unsigned int delay = CMM_DEFAULT_DELAY; static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY; @@ -446,90 +442,6 @@ static struct notifier_block cmm_reboot_nb = { .notifier_call = cmm_reboot_notifier, }; -/** - * cmm_count_pages - Count the number of pages loaned in a particular range. - * - * @arg: memory_isolate_notify structure with address range and count - * - * Return value: - * 0 on success - **/ -static unsigned long cmm_count_pages(void *arg) -{ - struct memory_isolate_notify *marg = arg; - struct page *page; - - spin_lock(&cmm_lock); - list_for_each_entry(page, &cmm_page_list, lru) { - if (page_to_pfn(page) >= marg->start_pfn && - page_to_pfn(page) < marg->start_pfn + marg->nr_pages) - marg->pages_found++; - } - spin_unlock(&cmm_lock); - return 0; -} - -/** - * cmm_memory_isolate_cb - Handle memory isolation notifier calls - * @self: notifier block struct - * @action: action to take - * @arg: struct memory_isolate_notify data for handler - * - * Return value: - * NOTIFY_OK or notifier error based on subfunction return value - **/ -static int cmm_memory_isolate_cb(struct notifier_block *self, - unsigned long action, void *arg) -{ - int ret = 0; - - if (action == MEM_ISOLATE_COUNT) - ret = cmm_count_pages(arg); - - return notifier_from_errno(ret); -} - -static struct notifier_block cmm_mem_isolate_nb = { - .notifier_call = cmm_memory_isolate_cb, - .priority = CMM_MEM_ISOLATE_PRI -}; - -/** - * cmm_mem_going_offline - Unloan pages where memory is to be removed - * @arg: memory_notify structure with page range to be offlined - * - * Return value: - * 0 on success - **/ -static int cmm_mem_going_offline(void *arg) -{ - struct memory_notify *marg = arg; - struct page *page, *tmp; - unsigned long freed = 0; - - cmm_dbg("Memory going offline, searching PFN 0x%lx (%ld pages).\n", - marg->start_pfn, marg->nr_pages); - spin_lock(&cmm_lock); - - /* Search the page list for pages in the range to be offlined */ - list_for_each_entry_safe(page, tmp, &cmm_page_list, lru) { - if (page_to_pfn(page) < marg->start_pfn || - page_to_pfn(page) >= marg->start_pfn + marg->nr_pages) - continue; - plpar_page_set_active(page); - list_del(&page->lru); - adjust_managed_page_count(page, 1); - __free_page(page); - freed++; - loaned_pages--; - } - - spin_unlock(&cmm_lock); - cmm_dbg("Released %ld pages in the search range.\n", freed); - - return 0; -} - /** * cmm_memory_cb - Handle memory hotplug notifier calls * @self: notifier block struct @@ -549,7 +461,6 @@ static int cmm_memory_cb(struct notifier_block *self, case MEM_GOING_OFFLINE: mutex_lock(&hotplug_mutex); hotplug_occurred = 1; - ret = cmm_mem_going_offline(arg); break; case MEM_OFFLINE: case MEM_CANCEL_OFFLINE: @@ -596,10 +507,6 @@ static int cmm_init(void) if (rc) goto out_unregister_notifier; - rc = register_memory_isolate_notifier(&cmm_mem_isolate_nb); - if (rc) - goto out_unregister_notifier; - if (cmm_disabled) return 0; @@ -612,7 +519,6 @@ static int cmm_init(void) return 0; out_unregister_notifier: unregister_memory_notifier(&cmm_mem_nb); - unregister_memory_isolate_notifier(&cmm_mem_isolate_nb); cmm_unregister_sysfs(&cmm_dev); out_reboot_notifier: unregister_reboot_notifier(&cmm_reboot_nb); @@ -634,7 +540,6 @@ static void cmm_exit(void) unregister_oom_notifier(&cmm_oom_nb); unregister_reboot_notifier(&cmm_reboot_nb); unregister_memory_notifier(&cmm_mem_nb); - unregister_memory_isolate_notifier(&cmm_mem_isolate_nb); cmm_free_pages(loaned_pages); cmm_unregister_sysfs(&cmm_dev); } From 1ef2f06b71792c2efaa4fb4aef8f1fc2a115ee1f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 31 Oct 2019 15:29:28 +0100 Subject: [PATCH 070/144] powerpc/pseries/cmm: Convert loaned_pages to an atomic_long_t When switching to balloon compaction, we want to drop the cmm_lock and completely rely on the balloon compaction list lock internally. loaned_pages is currently protected under the cmm_lock. Note: Right now cmm_alloc_pages() and cmm_free_pages() can be called at the same time, e.g., via the thread and a concurrent OOM notifier. Signed-off-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031142933.10779-8-david@redhat.com --- arch/powerpc/platforms/pseries/cmm.c | 35 +++++++++++++++------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 29416b621189f7..3a55dd1fdd3921 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -73,7 +73,7 @@ MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); } -static unsigned long loaned_pages; +static atomic_long_t loaned_pages; static unsigned long loaned_pages_target; static unsigned long oom_freed_pages; @@ -159,7 +159,7 @@ static long cmm_alloc_pages(long nr) } list_add(&page->lru, &cmm_page_list); - loaned_pages++; + atomic_long_inc(&loaned_pages); adjust_managed_page_count(page, -1); spin_unlock(&cmm_lock); nr--; @@ -189,7 +189,7 @@ static long cmm_free_pages(long nr) list_del(&page->lru); adjust_managed_page_count(page, 1); __free_page(page); - loaned_pages--; + atomic_long_dec(&loaned_pages); nr--; } spin_unlock(&cmm_lock); @@ -214,7 +214,7 @@ static int cmm_oom_notify(struct notifier_block *self, cmm_dbg("OOM processing started\n"); nr = cmm_free_pages(nr); - loaned_pages_target = loaned_pages; + loaned_pages_target = atomic_long_read(&loaned_pages); *freed += KB2PAGES(oom_kb) - nr; oom_freed_pages += KB2PAGES(oom_kb) - nr; cmm_dbg("OOM processing complete\n"); @@ -231,10 +231,11 @@ static int cmm_oom_notify(struct notifier_block *self, **/ static void cmm_get_mpp(void) { + const long __loaned_pages = atomic_long_read(&loaned_pages); + const long total_pages = totalram_pages() + __loaned_pages; int rc; struct hvcall_mpp_data mpp_data; signed long active_pages_target, page_loan_request, target; - signed long total_pages = totalram_pages() + loaned_pages; signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE; rc = h_get_mpp(&mpp_data); @@ -243,7 +244,7 @@ static void cmm_get_mpp(void) return; page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE); - target = page_loan_request + (signed long)loaned_pages; + target = page_loan_request + __loaned_pages; if (target < 0 || total_pages < min_mem_pages) target = 0; @@ -264,7 +265,7 @@ static void cmm_get_mpp(void) loaned_pages_target = target; cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", - page_loan_request, loaned_pages, loaned_pages_target, + page_loan_request, __loaned_pages, loaned_pages_target, oom_freed_pages, totalram_pages()); } @@ -282,6 +283,7 @@ static struct notifier_block cmm_oom_nb = { static int cmm_thread(void *dummy) { unsigned long timeleft; + long __loaned_pages; while (1) { timeleft = msleep_interruptible(delay * 1000); @@ -312,11 +314,12 @@ static int cmm_thread(void *dummy) cmm_get_mpp(); - if (loaned_pages_target > loaned_pages) { - if (cmm_alloc_pages(loaned_pages_target - loaned_pages)) - loaned_pages_target = loaned_pages; - } else if (loaned_pages_target < loaned_pages) - cmm_free_pages(loaned_pages - loaned_pages_target); + __loaned_pages = atomic_long_read(&loaned_pages); + if (loaned_pages_target > __loaned_pages) { + if (cmm_alloc_pages(loaned_pages_target - __loaned_pages)) + loaned_pages_target = __loaned_pages; + } else if (loaned_pages_target < __loaned_pages) + cmm_free_pages(__loaned_pages - loaned_pages_target); } return 0; } @@ -330,7 +333,7 @@ static int cmm_thread(void *dummy) } \ static DEVICE_ATTR(name, 0444, show_##name, NULL) -CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages)); +CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages))); CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target)); static ssize_t show_oom_pages(struct device *dev, @@ -433,7 +436,7 @@ static int cmm_reboot_notifier(struct notifier_block *nb, if (cmm_thread_ptr) kthread_stop(cmm_thread_ptr); cmm_thread_ptr = NULL; - cmm_free_pages(loaned_pages); + cmm_free_pages(atomic_long_read(&loaned_pages)); } return NOTIFY_DONE; } @@ -540,7 +543,7 @@ static void cmm_exit(void) unregister_oom_notifier(&cmm_oom_nb); unregister_reboot_notifier(&cmm_reboot_nb); unregister_memory_notifier(&cmm_mem_nb); - cmm_free_pages(loaned_pages); + cmm_free_pages(atomic_long_read(&loaned_pages)); cmm_unregister_sysfs(&cmm_dev); } @@ -561,7 +564,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp) if (cmm_thread_ptr) kthread_stop(cmm_thread_ptr); cmm_thread_ptr = NULL; - cmm_free_pages(loaned_pages); + cmm_free_pages(atomic_long_read(&loaned_pages)); } else if (!disable && cmm_disabled) { cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); if (IS_ERR(cmm_thread_ptr)) From fe030c9b85e6783bc52fe86449c0a4b8aa16c753 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 31 Oct 2019 15:29:29 +0100 Subject: [PATCH 071/144] powerpc/pseries/cmm: Implement balloon compaction We can now get rid of the cmm_lock and completely rely on the balloon compaction internals, which now also manage the page list and the lock. Inflated/"loaned" pages are now movable. Memory blocks that contain such pages can get offlined. Also, all such pages will be marked PageOffline() and can therefore be excluded in memory dumps using recent versions of makedumpfile. Don't switch to balloon_page_alloc() yet (due to the GFP_NOIO). Will do that separately to discuss this change in detail. Signed-off-by: David Hildenbrand [mpe: Add isolated_pages-- in cmm_migratepage() as suggested by David] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031142933.10779-9-david@redhat.com --- arch/powerpc/platforms/pseries/Kconfig | 1 + arch/powerpc/platforms/pseries/cmm.c | 133 ++++++++++++++++++++++--- include/uapi/linux/magic.h | 1 + 3 files changed, 121 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 9e35cddddf7307..595e9f8a65398d 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -108,6 +108,7 @@ config PPC_SMLPAR config CMM tristate "Collaborative memory management" depends on PPC_SMLPAR + select MEMORY_BALLOON default y help Select this option, if you want to enable the kernel interface diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 3a55dd1fdd3921..0663ddad964af9 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -19,6 +19,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -77,13 +81,11 @@ static atomic_long_t loaned_pages; static unsigned long loaned_pages_target; static unsigned long oom_freed_pages; -static LIST_HEAD(cmm_page_list); -static DEFINE_SPINLOCK(cmm_lock); - static DEFINE_MUTEX(hotplug_mutex); static int hotplug_occurred; /* protected by the hotplug mutex */ static struct task_struct *cmm_thread_ptr; +static struct balloon_dev_info b_dev_info; static long plpar_page_set_loaned(struct page *page) { @@ -149,19 +151,16 @@ static long cmm_alloc_pages(long nr) __GFP_NOMEMALLOC); if (!page) break; - spin_lock(&cmm_lock); rc = plpar_page_set_loaned(page); if (rc) { pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc); - spin_unlock(&cmm_lock); __free_page(page); break; } - list_add(&page->lru, &cmm_page_list); + balloon_page_enqueue(&b_dev_info, page); atomic_long_inc(&loaned_pages); adjust_managed_page_count(page, -1); - spin_unlock(&cmm_lock); nr--; } @@ -178,21 +177,19 @@ static long cmm_alloc_pages(long nr) **/ static long cmm_free_pages(long nr) { - struct page *page, *tmp; + struct page *page; cmm_dbg("Begin free of %ld pages.\n", nr); - spin_lock(&cmm_lock); - list_for_each_entry_safe(page, tmp, &cmm_page_list, lru) { - if (!nr) + while (nr) { + page = balloon_page_dequeue(&b_dev_info); + if (!page) break; plpar_page_set_active(page); - list_del(&page->lru); adjust_managed_page_count(page, 1); __free_page(page); atomic_long_dec(&loaned_pages); nr--; } - spin_unlock(&cmm_lock); cmm_dbg("End request with %ld pages unfulfilled\n", nr); return nr; } @@ -484,6 +481,106 @@ static struct notifier_block cmm_mem_nb = { .priority = CMM_MEM_HOTPLUG_PRI }; +#ifdef CONFIG_BALLOON_COMPACTION +static struct vfsmount *balloon_mnt; + +static int cmm_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type balloon_fs = { + .name = "ppc-cmm", + .init_fs_context = cmm_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static int cmm_migratepage(struct balloon_dev_info *b_dev_info, + struct page *newpage, struct page *page, + enum migrate_mode mode) +{ + unsigned long flags; + + /* + * loan/"inflate" the newpage first. + * + * We might race against the cmm_thread who might discover after our + * loan request that another page is to be unloaned. However, once + * the cmm_thread runs again later, this error will automatically + * be corrected. + */ + if (plpar_page_set_loaned(newpage)) { + /* Unlikely, but possible. Tell the caller not to retry now. */ + pr_err_ratelimited("%s: Cannot set page to loaned.", __func__); + return -EBUSY; + } + + /* balloon page list reference */ + get_page(newpage); + + spin_lock_irqsave(&b_dev_info->pages_lock, flags); + balloon_page_insert(b_dev_info, newpage); + balloon_page_delete(page); + b_dev_info->isolated_pages--; + spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); + + /* + * activate/"deflate" the old page. We ignore any errors just like the + * other callers. + */ + plpar_page_set_active(page); + + /* balloon page list reference */ + put_page(page); + + return MIGRATEPAGE_SUCCESS; +} + +static int cmm_balloon_compaction_init(void) +{ + int rc; + + balloon_devinfo_init(&b_dev_info); + b_dev_info.migratepage = cmm_migratepage; + + balloon_mnt = kern_mount(&balloon_fs); + if (IS_ERR(balloon_mnt)) { + rc = PTR_ERR(balloon_mnt); + balloon_mnt = NULL; + return rc; + } + + b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); + if (IS_ERR(b_dev_info.inode)) { + rc = PTR_ERR(b_dev_info.inode); + b_dev_info.inode = NULL; + kern_unmount(balloon_mnt); + balloon_mnt = NULL; + return rc; + } + + b_dev_info.inode->i_mapping->a_ops = &balloon_aops; + return 0; +} +static void cmm_balloon_compaction_deinit(void) +{ + if (b_dev_info.inode) + iput(b_dev_info.inode); + b_dev_info.inode = NULL; + kern_unmount(balloon_mnt); + balloon_mnt = NULL; +} +#else /* CONFIG_BALLOON_COMPACTION */ +static int cmm_balloon_compaction_init(void) +{ + return 0; +} + +static void cmm_balloon_compaction_deinit(void) +{ +} +#endif /* CONFIG_BALLOON_COMPACTION */ + /** * cmm_init - Module initialization * @@ -497,9 +594,14 @@ static int cmm_init(void) if (!firmware_has_feature(FW_FEATURE_CMO)) return -EOPNOTSUPP; - if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0) + rc = cmm_balloon_compaction_init(); + if (rc) return rc; + rc = register_oom_notifier(&cmm_oom_nb); + if (rc < 0) + goto out_balloon_compaction; + if ((rc = register_reboot_notifier(&cmm_reboot_nb))) goto out_oom_notifier; @@ -527,6 +629,8 @@ static int cmm_init(void) unregister_reboot_notifier(&cmm_reboot_nb); out_oom_notifier: unregister_oom_notifier(&cmm_oom_nb); +out_balloon_compaction: + cmm_balloon_compaction_deinit(); return rc; } @@ -545,6 +649,7 @@ static void cmm_exit(void) unregister_memory_notifier(&cmm_mem_nb); cmm_free_pages(atomic_long_read(&loaned_pages)); cmm_unregister_sysfs(&cmm_dev); + cmm_balloon_compaction_deinit(); } /** diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 903cc2d2750b24..3ac436376d79b6 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -94,5 +94,6 @@ #define ZSMALLOC_MAGIC 0x58295829 #define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */ #define Z3FOLD_MAGIC 0x33 +#define PPC_CMM_MAGIC 0xc7571590 #endif /* __LINUX_MAGIC_H__ */ From e8decafefb67794ba30fddf7d9e90a247b5aa172 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 31 Oct 2019 15:29:30 +0100 Subject: [PATCH 072/144] powerpc/pseries/cmm: Switch to balloon_page_alloc() balloon_page_alloc() will use GFP_HIGHUSER_MOVABLE in case we have CONFIG_BALLOON_COMPACTION. This is now possible, as balloon pages are movable with CONFIG_BALLOON_COMPACTION. Without CONFIG_BALLOON_COMPACTION, GFP_HIGHUSER is used. Note that apart from that, balloon_page_alloc() uses the following flags: __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN And current code used: GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC GFP_HIGHUSER/GFP_HIGHUSER_MOVABLE include __GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL | __GFP_HIGHMEM GFP_NOIO is __GFP_RECLAIM. With CONFIG_BALLOON_COMPACTION, we essentially add: __GFP_IO | __GFP_FS | __GFP_HARDWALL | __GFP_HIGHMEM | __GFP_MOVABLE Without CONFIG_BALLOON_COMPACTION, we essentially add: __GFP_IO | __GFP_FS | __GFP_HARDWALL | __GFP_HIGHMEM I assume this is fine, as this is what all other balloon compaction users use. If it turns out to be a problem, we could add __GFP_MOVABLE manually if we have CONFIG_BALLOON_COMPACTION. Signed-off-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031142933.10779-10-david@redhat.com --- arch/powerpc/platforms/pseries/cmm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 0663ddad964af9..86eb845b4737de 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -147,8 +147,7 @@ static long cmm_alloc_pages(long nr) break; } - page = alloc_page(GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | - __GFP_NOMEMALLOC); + page = balloon_page_alloc(); if (!page) break; rc = plpar_page_set_loaned(page); From b1713975c31ae20ecc40fd00191ee3fa51445d4a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 31 Oct 2019 15:29:31 +0100 Subject: [PATCH 073/144] powerpc/pseries/cmm: Simulation mode Let's allow to test the implementation without needing HW support. When "simulate=1" is specified when loading the module, we bypass all HW checks and HW calls. The sysfs file "simulate_loan_target_kb" can be used to simulate HW requests. The simualtion mode can be activated using: modprobe cmm debug=1 simulate=1 And the requested loan target can be changed using: echo X > /sys/devices/system/cmm/cmm0/simulate_loan_target_kb Signed-off-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031142933.10779-11-david@redhat.com --- arch/powerpc/platforms/pseries/cmm.c | 38 ++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 86eb845b4737de..91571841df8a70 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -51,6 +51,8 @@ static unsigned int oom_kb = CMM_OOM_KB; static unsigned int cmm_debug = CMM_DEBUG; static unsigned int cmm_disabled = CMM_DISABLE; static unsigned long min_mem_mb = CMM_MIN_MEM_MB; +static bool __read_mostly simulate; +static unsigned long simulate_loan_target_kb; static struct device cmm_dev; MODULE_AUTHOR("Brian King "); @@ -74,6 +76,8 @@ MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. " module_param_named(debug, cmm_debug, uint, 0644); MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " "[Default=" __stringify(CMM_DEBUG) "]"); +module_param_named(simulate, simulate, bool, 0444); +MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw)."); #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); } @@ -94,6 +98,9 @@ static long plpar_page_set_loaned(struct page *page) long rc = 0; int i; + if (unlikely(simulate)) + return 0; + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); @@ -111,6 +118,9 @@ static long plpar_page_set_active(struct page *page) long rc = 0; int i; + if (unlikely(simulate)) + return 0; + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); @@ -234,13 +244,17 @@ static void cmm_get_mpp(void) signed long active_pages_target, page_loan_request, target; signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE; - rc = h_get_mpp(&mpp_data); - - if (rc != H_SUCCESS) - return; - - page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE); - target = page_loan_request + __loaned_pages; + if (likely(!simulate)) { + rc = h_get_mpp(&mpp_data); + if (rc != H_SUCCESS) + return; + page_loan_request = div_s64((s64)mpp_data.loan_request, + PAGE_SIZE); + target = page_loan_request + __loaned_pages; + } else { + target = KB2PAGES(simulate_loan_target_kb); + page_loan_request = target - __loaned_pages; + } if (target < 0 || total_pages < min_mem_pages) target = 0; @@ -362,6 +376,9 @@ static struct device_attribute *cmm_attrs[] = { &dev_attr_oom_freed_kb, }; +static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644, + simulate_loan_target_kb); + static struct bus_type cmm_subsys = { .name = "cmm", .dev_name = "cmm", @@ -396,6 +413,11 @@ static int cmm_sysfs_register(struct device *dev) goto fail; } + if (!simulate) + return 0; + rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr); + if (rc) + goto fail; return 0; fail: @@ -590,7 +612,7 @@ static int cmm_init(void) { int rc; - if (!firmware_has_feature(FW_FEATURE_CMO)) + if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate) return -EOPNOTSUPP; rc = cmm_balloon_compaction_init(); From 9f0acf9f80ad504573e6482fb00b53866a9b9d2f Mon Sep 17 00:00:00 2001 From: Chris Smart Date: Sun, 3 Nov 2019 23:33:56 +0000 Subject: [PATCH 074/144] powerpc/crypto: Add cond_resched() in crc-vpmsum self-test The stress test for vpmsum implementations executes a long for loop in the kernel. This blocks the scheduler, which prevents other tasks from running, resulting in a warning. This fix adds a call to cond_reshed() at the end of each loop, which allows the scheduler to run other tasks as required. Signed-off-by: Chris Smart Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191103233356.5472-1-chris.smart@humanservices.gov.au --- arch/powerpc/crypto/crc-vpmsum_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c index 47985219a68f69..dce86e75f1a8a4 100644 --- a/arch/powerpc/crypto/crc-vpmsum_test.c +++ b/arch/powerpc/crypto/crc-vpmsum_test.c @@ -103,6 +103,7 @@ static int __init crc_test_init(void) crc32, verify32, len); break; } + cond_resched(); } pr_info("crc-vpmsum_test done, completed %lu iterations\n", i); } while (0); From 4e706af3cd8e1d0503c25332b30cad33c97ed442 Mon Sep 17 00:00:00 2001 From: "Gustavo L. F. Walbon" Date: Thu, 2 May 2019 18:09:07 -0300 Subject: [PATCH 075/144] powerpc/security: Fix wrong message when RFI Flush is disable The issue was showing "Mitigation" message via sysfs whatever the state of "RFI Flush", but it should show "Vulnerable" when it is disabled. If you have "L1D private" feature enabled and not "RFI Flush" you are vulnerable to meltdown attacks. "RFI Flush" is the key feature to mitigate the meltdown whatever the "L1D private" state. SEC_FTR_L1D_THREAD_PRIV is a feature for Power9 only. So the message should be as the truth table shows: CPU | L1D private | RFI Flush | sysfs ----|-------------|-----------|------------------------------------- P9 | False | False | Vulnerable P9 | False | True | Mitigation: RFI Flush P9 | True | False | Vulnerable: L1D private per thread P9 | True | True | Mitigation: RFI Flush, L1D private per thread P8 | False | False | Vulnerable P8 | False | True | Mitigation: RFI Flush Output before this fix: # cat /sys/devices/system/cpu/vulnerabilities/meltdown Mitigation: RFI Flush, L1D private per thread # echo 0 > /sys/kernel/debug/powerpc/rfi_flush # cat /sys/devices/system/cpu/vulnerabilities/meltdown Mitigation: L1D private per thread Output after fix: # cat /sys/devices/system/cpu/vulnerabilities/meltdown Mitigation: RFI Flush, L1D private per thread # echo 0 > /sys/kernel/debug/powerpc/rfi_flush # cat /sys/devices/system/cpu/vulnerabilities/meltdown Vulnerable: L1D private per thread Signed-off-by: Gustavo L. F. Walbon Signed-off-by: Mauro S. M. Rodrigues Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190502210907.42375-1-gwalbon@linux.ibm.com --- arch/powerpc/kernel/security.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index a3021e6faed865..faff8c2a0e2fcd 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -141,26 +141,22 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV); - if (rfi_flush || thread_priv) { + if (rfi_flush) { struct seq_buf s; seq_buf_init(&s, buf, PAGE_SIZE - 1); - seq_buf_printf(&s, "Mitigation: "); - - if (rfi_flush) - seq_buf_printf(&s, "RFI Flush"); - - if (rfi_flush && thread_priv) - seq_buf_printf(&s, ", "); - + seq_buf_printf(&s, "Mitigation: RFI Flush"); if (thread_priv) - seq_buf_printf(&s, "L1D private per thread"); + seq_buf_printf(&s, ", L1D private per thread"); seq_buf_printf(&s, "\n"); return s.len; } + if (thread_priv) + return sprintf(buf, "Vulnerable: L1D private per thread\n"); + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) return sprintf(buf, "Not affected\n"); From b811be615cb78c90fca42bbd5b958427d03ba7e0 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 17 Oct 2019 15:01:58 +0530 Subject: [PATCH 076/144] powerpc/watchpoint: Introduce macros for watchpoint length We are hadrcoding length everywhere in the watchpoint code. Introduce macros for the length and use them. Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191017093204.7511-2-ravi.bangoria@linux.ibm.com --- arch/powerpc/include/asm/hw_breakpoint.h | 3 +++ arch/powerpc/kernel/hw_breakpoint.c | 4 ++-- arch/powerpc/kernel/ptrace.c | 6 +++--- arch/powerpc/xmon/xmon.c | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 67e2da195eae4f..4a887e85a5f49d 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -33,6 +33,9 @@ struct arch_hw_breakpoint { #define HW_BRK_TYPE_PRIV_ALL (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \ HW_BRK_TYPE_HYP) +#define DABR_MAX_LEN 8 +#define DAWR_MAX_LEN 512 + #ifdef CONFIG_HAVE_HW_BREAKPOINT #include #include diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 1007ec36b4cb91..677041cb3c3e76 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -163,9 +163,9 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, */ if (!ppc_breakpoint_available()) return -ENODEV; - length_max = 8; /* DABR */ + length_max = DABR_MAX_LEN; /* DABR */ if (dawr_enabled()) { - length_max = 512 ; /* 64 doublewords */ + length_max = DAWR_MAX_LEN; /* 64 doublewords */ /* DAWR region can't cross 512 boundary */ if ((attr->bp_addr >> 9) != ((attr->bp_addr + attr->bp_len - 1) >> 9)) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 76724a023b9be8..eba39a5f62942c 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -2425,7 +2425,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, return -EIO; hw_brk.address = data & (~HW_BRK_TYPE_DABR); hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; - hw_brk.len = 8; + hw_brk.len = DABR_MAX_LEN; set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); #ifdef CONFIG_HAVE_HW_BREAKPOINT bp = thread->ptrace_bps[0]; @@ -2456,7 +2456,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Create a new breakpoint request if one doesn't exist already */ hw_breakpoint_init(&attr); attr.bp_addr = hw_brk.address; - attr.bp_len = 8; + attr.bp_len = DABR_MAX_LEN; arch_bp_generic_fields(hw_brk.type, &attr.bp_type); @@ -2882,7 +2882,7 @@ static long ppc_set_hwdebug(struct task_struct *child, brk.address = bp_info->addr & ~7UL; brk.type = HW_BRK_TYPE_TRANSLATE; - brk.len = 8; + brk.len = DABR_MAX_LEN; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) brk.type |= HW_BRK_TYPE_READ; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0a438b51dbb52a..a7056049709eff 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -933,7 +933,7 @@ static void insert_cpu_bpts(void) if (dabr.enabled) { brk.address = dabr.address; brk.type = (dabr.enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; - brk.len = 8; + brk.len = DABR_MAX_LEN; __set_breakpoint(&brk); } From b57aeab811db07295f646808b1b17c312d17f57d Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 17 Oct 2019 15:01:59 +0530 Subject: [PATCH 077/144] powerpc/watchpoint: Fix length calculation for unaligned target Watchpoint match range is always doubleword(8 bytes) aligned on powerpc. If the given range is crossing doubleword boundary, we need to increase the length such that next doubleword also get covered. Ex, address len = 6 bytes |=========. |------------v--|------v--------| | | | | | | | | | | | | | | | | | |---------------|---------------| <---8 bytes---> In such case, current code configures hw as: start_addr = address & ~HW_BREAKPOINT_ALIGN len = 8 bytes And thus read/write in last 4 bytes of the given range is ignored. Fix this by including next doubleword in the length. Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191017093204.7511-3-ravi.bangoria@linux.ibm.com --- arch/powerpc/include/asm/hw_breakpoint.h | 2 + arch/powerpc/kernel/dawr.c | 6 +-- arch/powerpc/kernel/hw_breakpoint.c | 67 +++++++++++++++++------- arch/powerpc/kernel/process.c | 3 ++ arch/powerpc/kernel/ptrace.c | 1 + 5 files changed, 56 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 4a887e85a5f49d..ea91ac7f5a273d 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -14,6 +14,7 @@ struct arch_hw_breakpoint { unsigned long address; u16 type; u16 len; /* length of the target data symbol */ + u16 hw_len; /* length programmed in hw */ }; /* Note: Don't change the the first 6 bits below as they are in the same order @@ -73,6 +74,7 @@ static inline void hw_breakpoint_disable(void) brk.address = 0; brk.type = 0; brk.len = 0; + brk.hw_len = 0; if (ppc_breakpoint_available()) __set_breakpoint(&brk); } diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index 5f66b95b68588a..cc14aa6c4a1bb4 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -30,10 +30,10 @@ int set_dawr(struct arch_hw_breakpoint *brk) * DAWR length is stored in field MDR bits 48:53. Matches range in * doublewords (64 bits) baised by -1 eg. 0b000000=1DW and * 0b111111=64DW. - * brk->len is in bytes. + * brk->hw_len is in bytes. * This aligns up to double word size, shifts and does the bias. */ - mrd = ((brk->len + 7) >> 3) - 1; + mrd = ((brk->hw_len + 7) >> 3) - 1; dawrx |= (mrd & 0x3f) << (63 - 53); if (ppc_md.set_dawr) @@ -54,7 +54,7 @@ static ssize_t dawr_write_file_bool(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { - struct arch_hw_breakpoint null_brk = {0, 0, 0}; + struct arch_hw_breakpoint null_brk = {0}; size_t rc; /* Send error to user if they hypervisor won't allow us to write DAWR */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 677041cb3c3e76..f36274d426ed20 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -126,6 +126,49 @@ int arch_bp_generic_fields(int type, int *gen_bp_type) return 0; } +/* + * Watchpoint match range is always doubleword(8 bytes) aligned on + * powerpc. If the given range is crossing doubleword boundary, we + * need to increase the length such that next doubleword also get + * covered. Ex, + * + * address len = 6 bytes + * |=========. + * |------------v--|------v--------| + * | | | | | | | | | | | | | | | | | + * |---------------|---------------| + * <---8 bytes---> + * + * In this case, we should configure hw as: + * start_addr = address & ~HW_BREAKPOINT_ALIGN + * len = 16 bytes + * + * @start_addr and @end_addr are inclusive. + */ +static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) +{ + u16 max_len = DABR_MAX_LEN; + u16 hw_len; + unsigned long start_addr, end_addr; + + start_addr = hw->address & ~HW_BREAKPOINT_ALIGN; + end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN; + hw_len = end_addr - start_addr + 1; + + if (dawr_enabled()) { + max_len = DAWR_MAX_LEN; + /* DAWR region can't cross 512 bytes boundary */ + if ((start_addr >> 9) != (end_addr >> 9)) + return -EINVAL; + } + + if (hw_len > max_len) + return -EINVAL; + + hw->hw_len = hw_len; + return 0; +} + /* * Validate the arch-specific HW Breakpoint register settings */ @@ -133,9 +176,9 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, const struct perf_event_attr *attr, struct arch_hw_breakpoint *hw) { - int ret = -EINVAL, length_max; + int ret = -EINVAL; - if (!bp) + if (!bp || !attr->bp_len) return ret; hw->type = HW_BRK_TYPE_TRANSLATE; @@ -155,26 +198,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, hw->address = attr->bp_addr; hw->len = attr->bp_len; - /* - * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8) - * and breakpoint addresses are aligned to nearest double-word - * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the - * 'symbolsize' should satisfy the check below. - */ if (!ppc_breakpoint_available()) return -ENODEV; - length_max = DABR_MAX_LEN; /* DABR */ - if (dawr_enabled()) { - length_max = DAWR_MAX_LEN; /* 64 doublewords */ - /* DAWR region can't cross 512 boundary */ - if ((attr->bp_addr >> 9) != - ((attr->bp_addr + attr->bp_len - 1) >> 9)) - return -EINVAL; - } - if (hw->len > - (length_max - (hw->address & HW_BREAKPOINT_ALIGN))) - return -EINVAL; - return 0; + + return hw_breakpoint_validate_len(hw); } /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 639ceae7da9d81..4df94b6e2f3229 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -715,6 +715,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread) { thread->hw_brk.address = 0; thread->hw_brk.type = 0; + thread->hw_brk.len = 0; + thread->hw_brk.hw_len = 0; if (ppc_breakpoint_available()) set_breakpoint(&thread->hw_brk); } @@ -816,6 +818,7 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a, return false; if (a->len != b->len) return false; + /* no need to check hw_len. it's calculated from address and len */ return true; } diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index eba39a5f62942c..c2dc93157b991b 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -2426,6 +2426,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, hw_brk.address = data & (~HW_BRK_TYPE_DABR); hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; hw_brk.len = DABR_MAX_LEN; + hw_brk.hw_len = DABR_MAX_LEN; set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); #ifdef CONFIG_HAVE_HW_BREAKPOINT bp = thread->ptrace_bps[0]; From c3f68b0478e7c07769394d17ebde0626600a7e1d Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 17 Oct 2019 15:02:00 +0530 Subject: [PATCH 078/144] powerpc/watchpoint: Fix ptrace code that muck around with address/len ptrace_set_debugreg() does not consider new length while overwriting the watchpoint. Fix that. ppc_set_hwdebug() aligns watchpoint address to doubleword boundary but does not change the length. If address range is crossing doubleword boundary and length is less then 8, we will lose samples from second doubleword. So fix that as well. Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191017093204.7511-4-ravi.bangoria@linux.ibm.com --- arch/powerpc/include/asm/hw_breakpoint.h | 4 ++-- arch/powerpc/kernel/ptrace.c | 9 +++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index ea91ac7f5a273d..27ac6f5d28918b 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -34,6 +34,8 @@ struct arch_hw_breakpoint { #define HW_BRK_TYPE_PRIV_ALL (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \ HW_BRK_TYPE_HYP) +#define HW_BREAKPOINT_ALIGN 0x7 + #define DABR_MAX_LEN 8 #define DAWR_MAX_LEN 512 @@ -48,8 +50,6 @@ struct pmu; struct perf_sample_data; struct task_struct; -#define HW_BREAKPOINT_ALIGN 0x7 - extern int hw_breakpoint_slots(int type); extern int arch_bp_generic_fields(int type, int *gen_bp_type); extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index c2dc93157b991b..25c0424e8868b9 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -2440,6 +2440,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, if (bp) { attr = bp->attr; attr.bp_addr = hw_brk.address; + attr.bp_len = DABR_MAX_LEN; arch_bp_generic_fields(hw_brk.type, &attr.bp_type); /* Enable breakpoint */ @@ -2881,7 +2882,7 @@ static long ppc_set_hwdebug(struct task_struct *child, if ((unsigned long)bp_info->addr >= TASK_SIZE) return -EIO; - brk.address = bp_info->addr & ~7UL; + brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN; brk.type = HW_BRK_TYPE_TRANSLATE; brk.len = DABR_MAX_LEN; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) @@ -2889,10 +2890,6 @@ static long ppc_set_hwdebug(struct task_struct *child, if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) brk.type |= HW_BRK_TYPE_WRITE; #ifdef CONFIG_HAVE_HW_BREAKPOINT - /* - * Check if the request is for 'range' breakpoints. We can - * support it if range < 8 bytes. - */ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) len = bp_info->addr2 - bp_info->addr; else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) @@ -2905,7 +2902,7 @@ static long ppc_set_hwdebug(struct task_struct *child, /* Create a new breakpoint request if one doesn't exist already */ hw_breakpoint_init(&attr); - attr.bp_addr = (unsigned long)bp_info->addr & ~HW_BREAKPOINT_ALIGN; + attr.bp_addr = (unsigned long)bp_info->addr; attr.bp_len = len; arch_bp_generic_fields(brk.type, &attr.bp_type); From 27985b2a640e24ce51810aadd0e93d5e0833c9b7 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 17 Oct 2019 15:02:01 +0530 Subject: [PATCH 079/144] powerpc/watchpoint: Don't ignore extraneous exceptions blindly On powerpc, watchpoint match range is double-word granular. On a watchpoint hit, DAR is set to the first byte of overlap between actual access and watched range. And thus it's quite possible that DAR does not point inside user specified range. Ex, say user creates a watchpoint with address range 0x1004 to 0x1007. So hw would be configured to watch from 0x1000 to 0x1007. If there is a 4 byte access from 0x1002 to 0x1005, DAR will point to 0x1002 and thus interrupt handler considers it as extraneous, but it's actually not, because part of the access belongs to what user has asked. Instead of blindly ignoring the exception, get actual address range by analysing an instruction, and ignore only if actual range does not overlap with user specified range. Note: The behavior is unchanged for 8xx. Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191017093204.7511-5-ravi.bangoria@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint.c | 52 +++++++++++++++++------------ 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index f36274d426ed20..58ce3d37c2a309 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -222,33 +222,49 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) tsk->thread.last_hit_ubp = NULL; } -static bool is_larx_stcx_instr(struct pt_regs *regs, unsigned int instr) +static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info) { - int ret, type; - struct instruction_op op; + return ((info->address <= dar) && (dar - info->address < info->len)); +} - ret = analyse_instr(&op, regs, instr); - type = GETTYPE(op.type); - return (!ret && (type == LARX || type == STCX)); +static bool +dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info) +{ + return ((dar <= info->address + info->len - 1) && + (dar + size - 1 >= info->address)); } /* * Handle debug exception notifications. */ static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp, - unsigned long addr) + struct arch_hw_breakpoint *info) { unsigned int instr = 0; + int ret, type, size; + struct instruction_op op; + unsigned long addr = info->address; if (__get_user_inatomic(instr, (unsigned int *)regs->nip)) goto fail; - if (is_larx_stcx_instr(regs, instr)) { + ret = analyse_instr(&op, regs, instr); + type = GETTYPE(op.type); + size = GETSIZE(op.type); + + if (!ret && (type == LARX || type == STCX)) { printk_ratelimited("Breakpoint hit on instruction that can't be emulated." " Breakpoint at 0x%lx will be disabled.\n", addr); goto disable; } + /* + * If it's extraneous event, we still need to emulate/single- + * step the instruction, but we don't generate an event. + */ + if (size && !dar_range_overlaps(regs->dar, size, info)) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + /* Do not emulate user-space instructions, instead single-step them */ if (user_mode(regs)) { current->thread.last_hit_ubp = bp; @@ -280,7 +296,6 @@ int hw_breakpoint_handler(struct die_args *args) struct perf_event *bp; struct pt_regs *regs = args->regs; struct arch_hw_breakpoint *info; - unsigned long dar = regs->dar; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); @@ -312,19 +327,14 @@ int hw_breakpoint_handler(struct die_args *args) goto out; } - /* - * Verify if dar lies within the address range occupied by the symbol - * being watched to filter extraneous exceptions. If it doesn't, - * we still need to single-step the instruction, but we don't - * generate an event. - */ info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (!((bp->attr.bp_addr <= dar) && - (dar - bp->attr.bp_addr < bp->attr.bp_len))) - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - - if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info->address)) - goto out; + if (IS_ENABLED(CONFIG_PPC_8xx)) { + if (!dar_within_range(regs->dar, info)) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + } else { + if (!stepping_handler(regs, bp, info)) + goto out; + } /* * As a policy, the callback is invoked in a 'trigger-after-execute' From c2837acfbf390c49056b4b412b65ce39977c15df Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 17 Oct 2019 15:02:02 +0530 Subject: [PATCH 080/144] selftests/powerpc: Rewrite ptrace-hwbreak.c selftest ptrace-hwbreak.c selftest is logically broken. On powerpc, when watchpoint is created with ptrace, signals are generated before executing the instruction and user has to manually singlestep the instruction with watchpoint disabled, which selftest never does and thus it keeps on getting the signal at the same instruction. If we fix it, selftest fails because the logical connection between tracer(parent) and tracee(child) is also broken. Rewrite the selftest and add new tests for unaligned access. With patch: $ ./tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak test: ptrace-hwbreak tags: git_version:powerpc-5.3-4-224-g218b868240c7-dirty PTRACE_SET_DEBUGREG, WO, len: 1: Ok PTRACE_SET_DEBUGREG, WO, len: 2: Ok PTRACE_SET_DEBUGREG, WO, len: 4: Ok PTRACE_SET_DEBUGREG, WO, len: 8: Ok PTRACE_SET_DEBUGREG, RO, len: 1: Ok PTRACE_SET_DEBUGREG, RO, len: 2: Ok PTRACE_SET_DEBUGREG, RO, len: 4: Ok PTRACE_SET_DEBUGREG, RO, len: 8: Ok PTRACE_SET_DEBUGREG, RW, len: 1: Ok PTRACE_SET_DEBUGREG, RW, len: 2: Ok PTRACE_SET_DEBUGREG, RW, len: 4: Ok PTRACE_SET_DEBUGREG, RW, len: 8: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO, len: 6: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO, len: 6: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW, len: 6: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO, len: 6: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO, len: 6: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW, len: 6: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE, RW, len: 6: Ok PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN, RW, len: 512: Ok success: ptrace-hwbreak Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191017093204.7511-6-ravi.bangoria@linux.ibm.com --- .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 571 +++++++++++------- 1 file changed, 361 insertions(+), 210 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 3066d310f32b64..916e97f5f8b1aa 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -22,318 +22,469 @@ #include #include "ptrace.h" -/* Breakpoint access modes */ -enum { - BP_X = 1, - BP_RW = 2, - BP_W = 4, -}; - -static pid_t child_pid; -static struct ppc_debug_info dbginfo; - -static void get_dbginfo(void) -{ - int ret; - - ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo); - if (ret) { - perror("Can't get breakpoint info\n"); - exit(-1); - } -} - -static bool hwbreak_present(void) -{ - return (dbginfo.num_data_bps != 0); -} +/* + * Use volatile on all global var so that compiler doesn't + * optimise their load/stores. Otherwise selftest can fail. + */ +static volatile __u64 glvar; -static bool dawr_present(void) -{ - return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR); -} +#define DAWR_MAX_LEN 512 +static volatile __u8 big_var[DAWR_MAX_LEN] __attribute__((aligned(512))); -static void set_breakpoint_addr(void *addr) -{ - int ret; +#define A_LEN 6 +#define B_LEN 6 +struct gstruct { + __u8 a[A_LEN]; /* double word aligned */ + __u8 b[B_LEN]; /* double word unaligned */ +}; +static volatile struct gstruct gstruct __attribute__((aligned(512))); - ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr); - if (ret) { - perror("Can't set breakpoint addr\n"); - exit(-1); - } -} -static int set_hwbreakpoint_addr(void *addr, int range) +static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) { - int ret; - - struct ppc_hw_breakpoint info; - - info.version = 1; - info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW; - info.addr_mode = PPC_BREAKPOINT_MODE_EXACT; - if (range > 0) - info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; - info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; - info.addr = (__u64)addr; - info.addr2 = (__u64)addr + range; - info.condition_value = 0; - - ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); - if (ret < 0) { - perror("Can't set breakpoint\n"); + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) { + perror("Can't get breakpoint info"); exit(-1); } - return ret; } -static int del_hwbreakpoint_addr(int watchpoint_handle) +static bool dawr_present(struct ppc_debug_info *dbginfo) { - int ret; - - ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle); - if (ret < 0) { - perror("Can't delete hw breakpoint\n"); - exit(-1); - } - return ret; + return !!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_DAWR); } -#define DAWR_LENGTH_MAX 512 - -/* Dummy variables to test read/write accesses */ -static unsigned long long - dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)] - __attribute__((aligned(512))); -static unsigned long long *dummy_var = dummy_array; - static void write_var(int len) { - long long *plval; - char *pcval; - short *psval; - int *pival; + __u8 *pcvar; + __u16 *psvar; + __u32 *pivar; + __u64 *plvar; switch (len) { case 1: - pcval = (char *)dummy_var; - *pcval = 0xff; + pcvar = (__u8 *)&glvar; + *pcvar = 0xff; break; case 2: - psval = (short *)dummy_var; - *psval = 0xffff; + psvar = (__u16 *)&glvar; + *psvar = 0xffff; break; case 4: - pival = (int *)dummy_var; - *pival = 0xffffffff; + pivar = (__u32 *)&glvar; + *pivar = 0xffffffff; break; case 8: - plval = (long long *)dummy_var; - *plval = 0xffffffffffffffffLL; + plvar = (__u64 *)&glvar; + *plvar = 0xffffffffffffffffLL; break; } } static void read_var(int len) { - char cval __attribute__((unused)); - short sval __attribute__((unused)); - int ival __attribute__((unused)); - long long lval __attribute__((unused)); + __u8 cvar __attribute__((unused)); + __u16 svar __attribute__((unused)); + __u32 ivar __attribute__((unused)); + __u64 lvar __attribute__((unused)); switch (len) { case 1: - cval = *(char *)dummy_var; + cvar = (__u8)glvar; break; case 2: - sval = *(short *)dummy_var; + svar = (__u16)glvar; break; case 4: - ival = *(int *)dummy_var; + ivar = (__u32)glvar; break; case 8: - lval = *(long long *)dummy_var; + lvar = (__u64)glvar; break; } } -/* - * Do the r/w accesses to trigger the breakpoints. And run - * the usual traps. - */ -static void trigger_tests(void) +static void test_workload(void) { - int len, ret; + __u8 cvar __attribute__((unused)); + __u32 ivar __attribute__((unused)); + int len = 0; - ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); - if (ret) { - perror("Can't be traced?\n"); - return; + if (ptrace(PTRACE_TRACEME, 0, NULL, 0)) { + perror("Child can't be traced?"); + exit(-1); } /* Wake up father so that it sets up the first test */ kill(getpid(), SIGUSR1); - /* Test write watchpoints */ - for (len = 1; len <= sizeof(long); len <<= 1) + /* PTRACE_SET_DEBUGREG, WO test */ + for (len = 1; len <= sizeof(glvar); len <<= 1) write_var(len); - /* Test read/write watchpoints (on read accesses) */ - for (len = 1; len <= sizeof(long); len <<= 1) + /* PTRACE_SET_DEBUGREG, RO test */ + for (len = 1; len <= sizeof(glvar); len <<= 1) read_var(len); - /* Test when breakpoint is unset */ - - /* Test write watchpoints */ - for (len = 1; len <= sizeof(long); len <<= 1) - write_var(len); + /* PTRACE_SET_DEBUGREG, RW test */ + for (len = 1; len <= sizeof(glvar); len <<= 1) { + if (rand() % 2) + read_var(len); + else + write_var(len); + } - /* Test read/write watchpoints (on read accesses) */ - for (len = 1; len <= sizeof(long); len <<= 1) - read_var(len); + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */ + write_var(1); + + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO test */ + read_var(1); + + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW test */ + if (rand() % 2) + write_var(1); + else + read_var(1); + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO test */ + cvar = gstruct.a[rand() % A_LEN]; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW test */ + if (rand() % 2) + gstruct.a[rand() % A_LEN] = 'a'; + else + cvar = gstruct.a[rand() % A_LEN]; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO test */ + gstruct.b[rand() % B_LEN] = 'b'; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO test */ + cvar = gstruct.b[rand() % B_LEN]; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW test */ + if (rand() % 2) + gstruct.b[rand() % B_LEN] = 'b'; + else + cvar = gstruct.b[rand() % B_LEN]; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE, RW test */ + if (rand() % 2) + *((int *)(gstruct.a + 4)) = 10; + else + ivar = *((int *)(gstruct.a + 4)); + + /* PPC_PTRACE_SETHWDEBUG. DAWR_MAX_LEN. RW test */ + if (rand() % 2) + big_var[rand() % DAWR_MAX_LEN] = 'a'; + else + cvar = big_var[rand() % DAWR_MAX_LEN]; } -static void check_success(const char *msg) +static void check_success(pid_t child_pid, const char *name, const char *type, + unsigned long saddr, int len) { - const char *msg2; int status; + siginfo_t siginfo; + unsigned long eaddr = (saddr + len - 1) | 0x7; + + saddr &= ~0x7; /* Wait for the child to SIGTRAP */ wait(&status); - msg2 = "Failed"; + ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &siginfo); - if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { - msg2 = "Child process hit the breakpoint"; + if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGTRAP || + (unsigned long)siginfo.si_addr < saddr || + (unsigned long)siginfo.si_addr > eaddr) { + printf("%s, %s, len: %d: Fail\n", name, type, len); + exit(-1); } - printf("%s Result: [%s]\n", msg, msg2); + printf("%s, %s, len: %d: Ok\n", name, type, len); + + /* + * For ptrace registered watchpoint, signal is generated + * before executing load/store. Singlestep the instruction + * and then continue the test. + */ + ptrace(PTRACE_SINGLESTEP, child_pid, NULL, 0); + wait(NULL); } -static void launch_watchpoints(char *buf, int mode, int len, - struct ppc_debug_info *dbginfo, bool dawr) +static void ptrace_set_debugreg(pid_t child_pid, unsigned long wp_addr) { - const char *mode_str; - unsigned long data = (unsigned long)(dummy_var); - int wh, range; - - data &= ~0x7UL; - - if (mode == BP_W) { - data |= (1UL << 1); - mode_str = "write"; - } else { - data |= (1UL << 0); - data |= (1UL << 1); - mode_str = "read"; + if (ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, wp_addr)) { + perror("PTRACE_SET_DEBUGREG failed"); + exit(-1); } +} - /* Set DABR_TRANSLATION bit */ - data |= (1UL << 2); - - /* use PTRACE_SET_DEBUGREG breakpoints */ - set_breakpoint_addr((void *)data); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); - check_success(buf); - /* Unregister hw brkpoint */ - set_breakpoint_addr(NULL); +static int ptrace_sethwdebug(pid_t child_pid, struct ppc_hw_breakpoint *info) +{ + int wh = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, info); - data = (data & ~7); /* remove dabr control bits */ + if (wh <= 0) { + perror("PPC_PTRACE_SETHWDEBUG failed"); + exit(-1); + } + return wh; +} - /* use PPC_PTRACE_SETHWDEBUG breakpoint */ - if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE)) - return; /* not supported */ - wh = set_hwbreakpoint_addr((void *)data, 0); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); - check_success(buf); - /* Unregister hw brkpoint */ - del_hwbreakpoint_addr(wh); - - /* try a wider range */ - range = 8; - if (dawr) - range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1)); - wh = set_hwbreakpoint_addr((void *)data, range); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); - check_success(buf); - /* Unregister hw brkpoint */ - del_hwbreakpoint_addr(wh); +static void ptrace_delhwdebug(pid_t child_pid, int wh) +{ + if (ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, wh) < 0) { + perror("PPC_PTRACE_DELHWDEBUG failed"); + exit(-1); + } } -/* Set the breakpoints and check the child successfully trigger them */ -static int launch_tests(bool dawr) +#define DABR_READ_SHIFT 0 +#define DABR_WRITE_SHIFT 1 +#define DABR_TRANSLATION_SHIFT 2 + +static int test_set_debugreg(pid_t child_pid) { - char buf[1024]; - int len, i, status; + unsigned long wp_addr = (unsigned long)&glvar; + char *name = "PTRACE_SET_DEBUGREG"; + int len; + + /* PTRACE_SET_DEBUGREG, WO test*/ + wp_addr &= ~0x7UL; + wp_addr |= (1UL << DABR_WRITE_SHIFT); + wp_addr |= (1UL << DABR_TRANSLATION_SHIFT); + for (len = 1; len <= sizeof(glvar); len <<= 1) { + ptrace_set_debugreg(child_pid, wp_addr); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr, len); + } - struct ppc_debug_info dbginfo; + /* PTRACE_SET_DEBUGREG, RO test */ + wp_addr &= ~0x7UL; + wp_addr |= (1UL << DABR_READ_SHIFT); + wp_addr |= (1UL << DABR_TRANSLATION_SHIFT); + for (len = 1; len <= sizeof(glvar); len <<= 1) { + ptrace_set_debugreg(child_pid, wp_addr); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr, len); + } - i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo); - if (i) { - perror("Can't set breakpoint info\n"); - exit(-1); + /* PTRACE_SET_DEBUGREG, RW test */ + wp_addr &= ~0x7UL; + wp_addr |= (1Ul << DABR_READ_SHIFT); + wp_addr |= (1UL << DABR_WRITE_SHIFT); + wp_addr |= (1UL << DABR_TRANSLATION_SHIFT); + for (len = 1; len <= sizeof(glvar); len <<= 1) { + ptrace_set_debugreg(child_pid, wp_addr); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); } - if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE)) - printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n"); - /* Write watchpoint */ - for (len = 1; len <= sizeof(long); len <<= 1) - launch_watchpoints(buf, BP_W, len, &dbginfo, dawr); + ptrace_set_debugreg(child_pid, 0); + return 0; +} - /* Read-Write watchpoint */ - for (len = 1; len <= sizeof(long); len <<= 1) - launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr); +static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, + unsigned long addr, int len) +{ + info->version = 1; + info->trigger_type = type; + info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info->addr = (__u64)addr; + info->addr2 = (__u64)addr + len; + info->condition_value = 0; + if (!len) + info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; + else + info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; +} +static void test_sethwdebug_exact(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr = (unsigned long)&glvar; + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT"; + int len = 1; /* hardcoded in kernel */ + int wh; + + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */ + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0); + wh = ptrace_sethwdebug(child_pid, &info); ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); - /* - * Now we have unregistered the breakpoint, access by child - * should not cause SIGTRAP. - */ + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO test */ + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, 0); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); - wait(&status); + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW test */ + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, 0); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); +} - if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { - printf("FAIL: Child process hit the breakpoint, which is not expected\n"); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - return TEST_FAIL; - } +static void test_sethwdebug_range_aligned(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr; + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED"; + int len; + int wh; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */ + wp_addr = (unsigned long)&gstruct.a; + len = A_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO test */ + wp_addr = (unsigned long)&gstruct.a; + len = A_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW test */ + wp_addr = (unsigned long)&gstruct.a; + len = A_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); +} - if (WIFEXITED(status)) - printf("Child exited normally\n"); +static void test_sethwdebug_range_unaligned(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr; + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED"; + int len; + int wh; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO test */ + wp_addr = (unsigned long)&gstruct.b; + len = B_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO test */ + wp_addr = (unsigned long)&gstruct.b; + len = B_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW test */ + wp_addr = (unsigned long)&gstruct.b; + len = B_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); - return TEST_PASS; +} + +static void test_sethwdebug_range_unaligned_dar(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr; + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE"; + int len; + int wh; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE, RW test */ + wp_addr = (unsigned long)&gstruct.b; + len = B_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); +} + +static void test_sethwdebug_dawr_max_range(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr; + char *name = "PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN"; + int len; + int wh; + + /* PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN, RW test */ + wp_addr = (unsigned long)big_var; + len = DAWR_MAX_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); +} + +/* Set the breakpoints and check the child successfully trigger them */ +static void +run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) +{ + test_set_debugreg(child_pid); + if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) { + test_sethwdebug_exact(child_pid); + test_sethwdebug_range_aligned(child_pid); + if (dawr) { + test_sethwdebug_range_unaligned(child_pid); + test_sethwdebug_range_unaligned_dar(child_pid); + test_sethwdebug_dawr_max_range(child_pid); + } + } } static int ptrace_hwbreak(void) { - pid_t pid; - int ret; + pid_t child_pid; + struct ppc_debug_info dbginfo; bool dawr; - pid = fork(); - if (!pid) { - trigger_tests(); + child_pid = fork(); + if (!child_pid) { + test_workload(); return 0; } wait(NULL); - child_pid = pid; - - get_dbginfo(); - SKIP_IF(!hwbreak_present()); - dawr = dawr_present(); + get_dbginfo(child_pid, &dbginfo); + SKIP_IF(dbginfo.num_data_bps == 0); - ret = launch_tests(dawr); + dawr = dawr_present(&dbginfo); + run_tests(child_pid, &dbginfo, dawr); + /* Let the child exit first. */ + ptrace(PTRACE_CONT, child_pid, NULL, 0); wait(NULL); - return ret; + /* + * Testcases exits immediately with -1 on any failure. If + * it has reached here, it means all tests were successful. + */ + return TEST_PASS; } int main(int argc, char **argv, char **envp) From 949758a2f4e606a2575b3cfd0ea22c1d4784c673 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 17 Oct 2019 15:02:03 +0530 Subject: [PATCH 081/144] powerpc/watchpoint: Add DAR outside test in perf-hwbreak.c selftest So far we used to ignore exception if DAR points outside of user specified range. But now we are ignoring it only if actual load/store range does not overlap with user specified range. Include selftests for the same: # ./tools/testing/selftests/powerpc/ptrace/perf-hwbreak ... TESTED: No overlap TESTED: Partial overlap TESTED: Partial overlap TESTED: No overlap TESTED: Full overlap success: perf_hwbreak Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191017093204.7511-7-ravi.bangoria@linux.ibm.com --- .../selftests/powerpc/ptrace/perf-hwbreak.c | 119 +++++++++++++++++- 1 file changed, 118 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index 200337daec421e..c1f324afdbf373 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -148,6 +148,121 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) return 0; } +static int runtest_dar_outside(void) +{ + void *target; + volatile __u16 temp16; + volatile __u64 temp64; + struct perf_event_attr attr; + int break_fd; + unsigned long long breaks; + int fail = 0; + size_t res; + + target = malloc(8); + if (!target) { + perror("malloc failed"); + exit(EXIT_FAILURE); + } + + /* setup counters */ + memset(&attr, 0, sizeof(attr)); + attr.disabled = 1; + attr.type = PERF_TYPE_BREAKPOINT; + attr.exclude_kernel = 1; + attr.exclude_hv = 1; + attr.exclude_guest = 1; + attr.bp_type = HW_BREAKPOINT_RW; + /* watch middle half of target array */ + attr.bp_addr = (__u64)(target + 2); + attr.bp_len = 4; + break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (break_fd < 0) { + free(target); + perror("sys_perf_event_open"); + exit(EXIT_FAILURE); + } + + /* Shouldn't hit. */ + ioctl(break_fd, PERF_EVENT_IOC_RESET); + ioctl(break_fd, PERF_EVENT_IOC_ENABLE); + temp16 = *((__u16 *)target); + *((__u16 *)target) = temp16; + ioctl(break_fd, PERF_EVENT_IOC_DISABLE); + res = read(break_fd, &breaks, sizeof(unsigned long long)); + assert(res == sizeof(unsigned long long)); + if (breaks == 0) { + printf("TESTED: No overlap\n"); + } else { + printf("FAILED: No overlap: %lld != 0\n", breaks); + fail = 1; + } + + /* Hit */ + ioctl(break_fd, PERF_EVENT_IOC_RESET); + ioctl(break_fd, PERF_EVENT_IOC_ENABLE); + temp16 = *((__u16 *)(target + 1)); + *((__u16 *)(target + 1)) = temp16; + ioctl(break_fd, PERF_EVENT_IOC_DISABLE); + res = read(break_fd, &breaks, sizeof(unsigned long long)); + assert(res == sizeof(unsigned long long)); + if (breaks == 2) { + printf("TESTED: Partial overlap\n"); + } else { + printf("FAILED: Partial overlap: %lld != 2\n", breaks); + fail = 1; + } + + /* Hit */ + ioctl(break_fd, PERF_EVENT_IOC_RESET); + ioctl(break_fd, PERF_EVENT_IOC_ENABLE); + temp16 = *((__u16 *)(target + 5)); + *((__u16 *)(target + 5)) = temp16; + ioctl(break_fd, PERF_EVENT_IOC_DISABLE); + res = read(break_fd, &breaks, sizeof(unsigned long long)); + assert(res == sizeof(unsigned long long)); + if (breaks == 2) { + printf("TESTED: Partial overlap\n"); + } else { + printf("FAILED: Partial overlap: %lld != 2\n", breaks); + fail = 1; + } + + /* Shouldn't Hit */ + ioctl(break_fd, PERF_EVENT_IOC_RESET); + ioctl(break_fd, PERF_EVENT_IOC_ENABLE); + temp16 = *((__u16 *)(target + 6)); + *((__u16 *)(target + 6)) = temp16; + ioctl(break_fd, PERF_EVENT_IOC_DISABLE); + res = read(break_fd, &breaks, sizeof(unsigned long long)); + assert(res == sizeof(unsigned long long)); + if (breaks == 0) { + printf("TESTED: No overlap\n"); + } else { + printf("FAILED: No overlap: %lld != 0\n", breaks); + fail = 1; + } + + /* Hit */ + ioctl(break_fd, PERF_EVENT_IOC_RESET); + ioctl(break_fd, PERF_EVENT_IOC_ENABLE); + temp64 = *((__u64 *)target); + *((__u64 *)target) = temp64; + ioctl(break_fd, PERF_EVENT_IOC_DISABLE); + res = read(break_fd, &breaks, sizeof(unsigned long long)); + assert(res == sizeof(unsigned long long)); + if (breaks == 2) { + printf("TESTED: Full overlap\n"); + } else { + printf("FAILED: Full overlap: %lld != 2\n", breaks); + fail = 1; + } + + free(target); + close(break_fd); + return fail; +} + static int runtest(void) { int rwflag; @@ -172,7 +287,9 @@ static int runtest(void) return ret; } } - return 0; + + ret = runtest_dar_outside(); + return ret; } From 5dc7b419a5a746db6941154b06b932eaf2e692df Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 17 Oct 2019 15:02:04 +0530 Subject: [PATCH 082/144] powerpc/watchpoint: Support for 8xx in ptrace-hwbreak.c selftest On the 8xx, signals are generated after executing the instruction. So no need to manually single-step on 8xx. Also, 8xx __set_dabr() currently ignores length and hardcodes the length to 8 bytes. So all unaligned and 512 byte testcase will fail on 8xx. Ignore those testcases on 8xx. Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191017093204.7511-8-ravi.bangoria@linux.ibm.com --- .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 916e97f5f8b1aa..7deedbc16b0b72 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -22,6 +22,11 @@ #include #include "ptrace.h" +#define SPRN_PVR 0x11F +#define PVR_8xx 0x00500000 + +bool is_8xx; + /* * Use volatile on all global var so that compiler doesn't * optimise their load/stores. Otherwise selftest can fail. @@ -205,13 +210,15 @@ static void check_success(pid_t child_pid, const char *name, const char *type, printf("%s, %s, len: %d: Ok\n", name, type, len); - /* - * For ptrace registered watchpoint, signal is generated - * before executing load/store. Singlestep the instruction - * and then continue the test. - */ - ptrace(PTRACE_SINGLESTEP, child_pid, NULL, 0); - wait(NULL); + if (!is_8xx) { + /* + * For ptrace registered watchpoint, signal is generated + * before executing load/store. Singlestep the instruction + * and then continue the test. + */ + ptrace(PTRACE_SINGLESTEP, child_pid, NULL, 0); + wait(NULL); + } } static void ptrace_set_debugreg(pid_t child_pid, unsigned long wp_addr) @@ -447,8 +454,10 @@ run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) test_set_debugreg(child_pid); if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) { test_sethwdebug_exact(child_pid); - test_sethwdebug_range_aligned(child_pid); - if (dawr) { + + if (!is_8xx) + test_sethwdebug_range_aligned(child_pid); + if (dawr && !is_8xx) { test_sethwdebug_range_unaligned(child_pid); test_sethwdebug_range_unaligned_dar(child_pid); test_sethwdebug_dawr_max_range(child_pid); @@ -489,5 +498,10 @@ static int ptrace_hwbreak(void) int main(int argc, char **argv, char **envp) { + int pvr = 0; + asm __volatile__ ("mfspr %0,%1" : "=r"(pvr) : "i"(SPRN_PVR)); + if (pvr == PVR_8xx) + is_8xx = true; + return test_harness(ptrace_hwbreak, "ptrace-hwbreak"); } From d273fa919c39223a1edd968e82ea88501b63d21a Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 23 Oct 2019 21:48:38 +0800 Subject: [PATCH 083/144] powerpc/pseries: Use correct event modifier in rtas_parse_epow_errlog() rtas_parse_epow_errlog() should pass 'modifier' to handle_system_shutdown, because event modifier only use bottom 4 bits. Reviewed-by: Tyrel Datwyler Signed-off-by: YueHaibing Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191023134838.21280-1-yuehaibing@huawei.com --- arch/powerpc/platforms/pseries/ras.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 3acdcc3bb908c6..1d7f973c647b37 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -255,7 +255,7 @@ static void rtas_parse_epow_errlog(struct rtas_error_log *log) break; case EPOW_SYSTEM_SHUTDOWN: - handle_system_shutdown(epow_log->event_modifier); + handle_system_shutdown(modifier); break; case EPOW_SYSTEM_HALT: From 090d5ab93d0b8a079dd516c16649bd00ba4f7302 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 29 Nov 2018 13:35:18 +0000 Subject: [PATCH 084/144] powerpc/64s: Fix debugfs_simple_attr.cocci warnings Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE for debugfs files. Semantic patch information: Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file() imposes some significant overhead as compared to DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe(). Generated by: scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci Signed-off-by: YueHaibing Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1543498518-107601-1-git-send-email-yuehaibing@huawei.com --- arch/powerpc/kernel/security.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index faff8c2a0e2fcd..7d4b2080a65850 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -94,13 +94,14 @@ static int barrier_nospec_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_barrier_nospec, - barrier_nospec_get, barrier_nospec_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get, + barrier_nospec_set, "%llu\n"); static __init int barrier_nospec_debugfs_init(void) { - debugfs_create_file("barrier_nospec", 0600, powerpc_debugfs_root, NULL, - &fops_barrier_nospec); + debugfs_create_file_unsafe("barrier_nospec", 0600, + powerpc_debugfs_root, NULL, + &fops_barrier_nospec); return 0; } device_initcall(barrier_nospec_debugfs_init); @@ -368,11 +369,13 @@ static int stf_barrier_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, + "%llu\n"); static __init int stf_barrier_debugfs_init(void) { - debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier); + debugfs_create_file_unsafe("stf_barrier", 0600, powerpc_debugfs_root, + NULL, &fops_stf_barrier); return 0; } device_initcall(stf_barrier_debugfs_init); @@ -443,13 +446,14 @@ static int count_cache_flush_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, - count_cache_flush_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, + count_cache_flush_set, "%llu\n"); static __init int count_cache_flush_debugfs_init(void) { - debugfs_create_file("count_cache_flush", 0600, powerpc_debugfs_root, - NULL, &fops_count_cache_flush); + debugfs_create_file_unsafe("count_cache_flush", 0600, + powerpc_debugfs_root, NULL, + &fops_count_cache_flush); return 0; } device_initcall(count_cache_flush_debugfs_init); From bfa2325e5b8b800cc6720cad8d2f066cd0136bee Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 25 Dec 2018 02:44:36 +0000 Subject: [PATCH 085/144] powerpc/powernv/npu: Fix debugfs_simple_attr.cocci warnings Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE for debugfs files. Semantic patch information: Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file() imposes some significant overhead as compared to DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe(). Generated by: scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci Signed-off-by: YueHaibing Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1545705876-63132-1-git-send-email-yuehaibing@huawei.com --- arch/powerpc/platforms/powernv/pci-ioda.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c28d0d9b7ee0fc..da1068a9c2637a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3086,8 +3086,8 @@ static int pnv_pci_diag_data_set(void *data, u64 val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, - pnv_pci_diag_data_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, pnv_pci_diag_data_set, + "%llu\n"); #endif /* CONFIG_DEBUG_FS */ @@ -3112,8 +3112,8 @@ static void pnv_pci_ioda_create_dbgfs(void) continue; } - debugfs_create_file("dump_diag_regs", 0200, phb->dbgfs, hose, - &pnv_pci_diag_data_fops); + debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs, + hose, &pnv_pci_diag_data_fops); } #endif /* CONFIG_DEBUG_FS */ } From 11dd34f3eae5a468013bb161a1dcf1fecd2ca321 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 18 Feb 2019 12:56:44 +0000 Subject: [PATCH 086/144] powerpc/pseries: Drop pointless static qualifier in vpa_debugfs_init() There is no need to have the 'struct dentry *vpa_dir' variable static since new value always be assigned before use it. Fixes: c6c26fb55e8e ("powerpc/pseries: Export raw per-CPU VPA data via debugfs") Signed-off-by: YueHaibing Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190218125644.87448-1-yuehaibing@huawei.com --- arch/powerpc/platforms/pseries/lpar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 74c59a1e96278f..8c5af2cd95a73e 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -2000,7 +2000,7 @@ static int __init vpa_debugfs_init(void) { char name[16]; long i; - static struct dentry *vpa_dir; + struct dentry *vpa_dir; if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0; From 42974f357dbf05d649ff62719de21995e7cfee79 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 18 Feb 2019 13:39:50 +0000 Subject: [PATCH 087/144] powerpc/pseries: Fix platform_no_drv_owner.cocci warnings Remove .owner field if calls are used which set it automatically Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: YueHaibing Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190218133950.95225-1-yuehaibing@huawei.com --- arch/powerpc/platforms/pseries/papr_scm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index ee07d0718bf1a0..f87b474d25a77f 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -513,7 +513,6 @@ static struct platform_driver papr_scm_driver = { .remove = papr_scm_remove, .driver = { .name = "papr_scm", - .owner = THIS_MODULE, .of_match_table = papr_scm_match, }, }; From 93a1544ad4ec4bd9147992e57b4f834ceb2cc159 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 12 Nov 2019 23:01:34 +1100 Subject: [PATCH 088/144] powerpc/configs: remove obsolete CONFIG_INET_XFRM_MODE_* and CONFIG_INET6_XFRM_MODE_* These Kconfig options has been removed in commit 4c145dce2601 ("xfrm: make xfrm modes builtin") So there is no point to keep it in defconfigs any longer. Signed-off-by: YueHaibing [mpe: Extract from cross arch patch] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190612071901.21736-1-yuehaibing@huawei.com --- arch/powerpc/configs/40x/acadia_defconfig | 3 --- arch/powerpc/configs/40x/ep405_defconfig | 3 --- arch/powerpc/configs/40x/kilauea_defconfig | 3 --- arch/powerpc/configs/40x/makalu_defconfig | 3 --- arch/powerpc/configs/40x/obs600_defconfig | 3 --- arch/powerpc/configs/40x/walnut_defconfig | 3 --- arch/powerpc/configs/44x/akebono_defconfig | 3 --- arch/powerpc/configs/44x/arches_defconfig | 3 --- arch/powerpc/configs/44x/bamboo_defconfig | 3 --- arch/powerpc/configs/44x/canyonlands_defconfig | 3 --- arch/powerpc/configs/44x/currituck_defconfig | 3 --- arch/powerpc/configs/44x/ebony_defconfig | 3 --- arch/powerpc/configs/44x/eiger_defconfig | 3 --- arch/powerpc/configs/44x/fsp2_defconfig | 3 --- arch/powerpc/configs/44x/icon_defconfig | 3 --- arch/powerpc/configs/44x/iss476-smp_defconfig | 3 --- arch/powerpc/configs/44x/katmai_defconfig | 3 --- arch/powerpc/configs/44x/rainier_defconfig | 3 --- arch/powerpc/configs/44x/redwood_defconfig | 3 --- arch/powerpc/configs/44x/sam440ep_defconfig | 3 --- arch/powerpc/configs/44x/sequoia_defconfig | 3 --- arch/powerpc/configs/44x/taishan_defconfig | 3 --- arch/powerpc/configs/52xx/pcm030_defconfig | 3 --- arch/powerpc/configs/83xx/kmeter1_defconfig | 3 --- arch/powerpc/configs/83xx/mpc837x_rdb_defconfig | 3 --- arch/powerpc/configs/85xx/ge_imp3a_defconfig | 1 - arch/powerpc/configs/adder875_defconfig | 3 --- arch/powerpc/configs/amigaone_defconfig | 3 --- arch/powerpc/configs/cell_defconfig | 2 -- arch/powerpc/configs/chrp32_defconfig | 3 --- arch/powerpc/configs/ep88xc_defconfig | 3 --- arch/powerpc/configs/gamecube_defconfig | 3 --- arch/powerpc/configs/mpc512x_defconfig | 3 --- arch/powerpc/configs/mpc885_ads_defconfig | 3 --- arch/powerpc/configs/pmac32_defconfig | 2 -- arch/powerpc/configs/powernv_defconfig | 3 --- arch/powerpc/configs/ppc44x_defconfig | 3 --- arch/powerpc/configs/ppc6xx_defconfig | 4 ---- arch/powerpc/configs/ps3_defconfig | 3 --- arch/powerpc/configs/skiroot_defconfig | 3 --- arch/powerpc/configs/storcenter_defconfig | 3 --- arch/powerpc/configs/tqm8xx_defconfig | 3 --- arch/powerpc/configs/wii_defconfig | 3 --- 43 files changed, 126 deletions(-) diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig index 5a75e4f1427307..db93c117be36e8 100644 --- a/arch/powerpc/configs/40x/acadia_defconfig +++ b/arch/powerpc/configs/40x/acadia_defconfig @@ -18,9 +18,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig index e2691c5db76604..a3854cf65f8d0a 100644 --- a/arch/powerpc/configs/40x/ep405_defconfig +++ b/arch/powerpc/configs/40x/ep405_defconfig @@ -17,9 +17,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig index 949989ef23228e..edc22464dfb57f 100644 --- a/arch/powerpc/configs/40x/kilauea_defconfig +++ b/arch/powerpc/configs/40x/kilauea_defconfig @@ -20,9 +20,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig index 90b759bbf426e7..188789b9aa4c0f 100644 --- a/arch/powerpc/configs/40x/makalu_defconfig +++ b/arch/powerpc/configs/40x/makalu_defconfig @@ -17,9 +17,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig index 881c300c011d24..5bf6af7ef09342 100644 --- a/arch/powerpc/configs/40x/obs600_defconfig +++ b/arch/powerpc/configs/40x/obs600_defconfig @@ -20,9 +20,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig index 0ed46704b9faca..9eaaf1a1d2c68a 100644 --- a/arch/powerpc/configs/40x/walnut_defconfig +++ b/arch/powerpc/configs/40x/walnut_defconfig @@ -15,9 +15,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 2fa553ebfdc945..f0c8a07cc27463 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -29,9 +29,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig index 5a1b9ee1807531..82c6f49b8dcb14 100644 --- a/arch/powerpc/configs/44x/arches_defconfig +++ b/arch/powerpc/configs/44x/arches_defconfig @@ -20,9 +20,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig index 22e1ef5272ab18..679213214a7541 100644 --- a/arch/powerpc/configs/44x/bamboo_defconfig +++ b/arch/powerpc/configs/44x/bamboo_defconfig @@ -18,9 +18,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig index 86f34ea4173a71..ccc14eb7a2f129 100644 --- a/arch/powerpc/configs/44x/canyonlands_defconfig +++ b/arch/powerpc/configs/44x/canyonlands_defconfig @@ -20,9 +20,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig index ce3ec5a2cd1571..be76e066df016a 100644 --- a/arch/powerpc/configs/44x/currituck_defconfig +++ b/arch/powerpc/configs/44x/currituck_defconfig @@ -27,9 +27,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig index f67447c92e6feb..93d2a4e64af929 100644 --- a/arch/powerpc/configs/44x/ebony_defconfig +++ b/arch/powerpc/configs/44x/ebony_defconfig @@ -16,9 +16,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig index 5dbd83a1c11b90..1abaa63e067f0d 100644 --- a/arch/powerpc/configs/44x/eiger_defconfig +++ b/arch/powerpc/configs/44x/eiger_defconfig @@ -21,9 +21,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig index e49114f0e52689..e67fc041ca3e8b 100644 --- a/arch/powerpc/configs/44x/fsp2_defconfig +++ b/arch/powerpc/configs/44x/fsp2_defconfig @@ -39,9 +39,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_VLAN_8021Q=m CONFIG_DEVTMPFS=y diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig index fa5378af44f90a..7d7ff84c82000a 100644 --- a/arch/powerpc/configs/44x/icon_defconfig +++ b/arch/powerpc/configs/44x/icon_defconfig @@ -20,9 +20,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index aae879c2123955..fb5c73a29bf426 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -29,9 +29,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig index 56eddca998c6e1..c6dc1445fc043f 100644 --- a/arch/powerpc/configs/44x/katmai_defconfig +++ b/arch/powerpc/configs/44x/katmai_defconfig @@ -18,9 +18,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig index 369bfd2e451da8..c83ad03182df78 100644 --- a/arch/powerpc/configs/44x/rainier_defconfig +++ b/arch/powerpc/configs/44x/rainier_defconfig @@ -19,9 +19,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig index 8be95f6fe3a7fb..640fe1d5af2822 100644 --- a/arch/powerpc/configs/44x/redwood_defconfig +++ b/arch/powerpc/configs/44x/redwood_defconfig @@ -21,9 +21,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 974a4f038cda6d..ed02f12dbd54e1 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -23,9 +23,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig index 10e517b69fa4e3..2c0973db88373d 100644 --- a/arch/powerpc/configs/44x/sequoia_defconfig +++ b/arch/powerpc/configs/44x/sequoia_defconfig @@ -20,9 +20,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig index cd08f3ddd6093e..a2d355ca62b286 100644 --- a/arch/powerpc/configs/44x/taishan_defconfig +++ b/arch/powerpc/configs/44x/taishan_defconfig @@ -18,9 +18,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig index 303600ff1fdbcd..fdb11daeb68805 100644 --- a/arch/powerpc/configs/52xx/pcm030_defconfig +++ b/arch/powerpc/configs/52xx/pcm030_defconfig @@ -31,9 +31,6 @@ CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set diff --git a/arch/powerpc/configs/83xx/kmeter1_defconfig b/arch/powerpc/configs/83xx/kmeter1_defconfig index d21b5cb365f23a..648c6b3dccf99d 100644 --- a/arch/powerpc/configs/83xx/kmeter1_defconfig +++ b/arch/powerpc/configs/83xx/kmeter1_defconfig @@ -25,9 +25,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_TIPC=y CONFIG_BRIDGE=m diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig index dad53ef86b49f8..cbcae2a927e99e 100644 --- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig @@ -22,9 +22,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index 920f37316fdbdf..f29c166998af59 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -60,7 +60,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_INET6_AH=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_TUNNEL=m diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index f7a803ab22850f..510f7fd1f6a35e 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -22,9 +22,6 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig index cf94d28d0e3107..f6d140f2d922bb 100644 --- a/arch/powerpc/configs/amigaone_defconfig +++ b/arch/powerpc/configs/amigaone_defconfig @@ -26,9 +26,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 2dd1b58a18aeff..42fbc70cec3310 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -51,11 +51,9 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m -# CONFIG_INET6_XFRM_MODE_BEET is not set # CONFIG_IPV6_SIT is not set CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index 9ff493dd8439b4..502a75d49789e6 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig @@ -27,9 +27,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index b20bd0cf35438c..9c1bf60f1e1930 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -24,9 +24,6 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig index 85e73c3bd85987..24c0e0ea5aeb9b 100644 --- a/arch/powerpc/configs/gamecube_defconfig +++ b/arch/powerpc/configs/gamecube_defconfig @@ -29,9 +29,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_RARP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index 6203c1093a3a4f..1f3a045ab08111 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -25,9 +25,6 @@ CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_PNP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_CAN=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 285d506c5a7699..0327a329316fe1 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -23,9 +23,6 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 4e6e95f9264607..f492e7d3592549 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -38,8 +38,6 @@ CONFIG_IP_MULTICAST=y CONFIG_SYN_COOKIES=y CONFIG_INET_AH=y CONFIG_INET_ESP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 6658cceb928c6e..32841456a57304 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -83,9 +83,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m CONFIG_IPV6_SIT=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index 67952819593e7e..a41eedfe0a5f32 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -32,9 +32,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_BRIDGE=m CONFIG_CONNECTOR=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 9dca4cffa623da..7e28919041cf2d 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -109,9 +109,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_HSTCP=m @@ -129,7 +126,6 @@ CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_TUNNEL=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 314c63939816dc..4db51719342a28 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -47,9 +47,6 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set CONFIG_BT=m CONFIG_BT_RFCOMM=m diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 1e18454083ff8f..069f67f127319e 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -64,9 +64,6 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_NET_IPIP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_DNS_RESOLVER=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index 6c39c52b8e4a74..29b19ec7e5d74a 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -22,9 +22,6 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index 7493f36dd6e9c0..ffed2b4256d697 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -27,9 +27,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set # CONFIG_FW_LOADER is not set diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig index 5a04448ad6b58f..379c171f3ddd03 100644 --- a/arch/powerpc/configs/wii_defconfig +++ b/arch/powerpc/configs/wii_defconfig @@ -29,9 +29,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_RARP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_BT=y From bc75e5438488edef80d952d1146701f872092750 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 2 Jul 2019 21:17:33 +0800 Subject: [PATCH 089/144] powerpc/powernv: Make some symbols static Fix sparse warnings: arch/powerpc/platforms/powernv/opal-psr.c:20:1: warning: symbol 'psr_mutex' was not declared. Should it be static? arch/powerpc/platforms/powernv/opal-psr.c:27:3: warning: symbol 'psr_attrs' was not declared. Should it be static? arch/powerpc/platforms/powernv/opal-powercap.c:20:1: warning: symbol 'powercap_mutex' was not declared. Should it be static? arch/powerpc/platforms/powernv/opal-sensor-groups.c:20:1: warning: symbol 'sg_mutex' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190702131733.44100-1-yuehaibing@huawei.com --- arch/powerpc/platforms/powernv/opal-powercap.c | 2 +- arch/powerpc/platforms/powernv/opal-psr.c | 4 ++-- arch/powerpc/platforms/powernv/opal-sensor-groups.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c index dc599e787f78ff..c16d44f6f1d12e 100644 --- a/arch/powerpc/platforms/powernv/opal-powercap.c +++ b/arch/powerpc/platforms/powernv/opal-powercap.c @@ -13,7 +13,7 @@ #include -DEFINE_MUTEX(powercap_mutex); +static DEFINE_MUTEX(powercap_mutex); static struct kobject *powercap_kobj; diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c index b6ccb3026c6c0e..69d7e75950d135 100644 --- a/arch/powerpc/platforms/powernv/opal-psr.c +++ b/arch/powerpc/platforms/powernv/opal-psr.c @@ -13,11 +13,11 @@ #include -DEFINE_MUTEX(psr_mutex); +static DEFINE_MUTEX(psr_mutex); static struct kobject *psr_kobj; -struct psr_attr { +static struct psr_attr { u32 handle; struct kobj_attribute attr; } *psr_attrs; diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c index 31f13c13275f0b..f8ae1fb0c102f7 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -13,7 +13,7 @@ #include -DEFINE_MUTEX(sg_mutex); +static DEFINE_MUTEX(sg_mutex); static struct kobject *sg_kobj; From c312d14e19bb7ca8214ef661d9a125cd631528cb Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 11 Jul 2019 22:18:18 +0800 Subject: [PATCH 090/144] powerpc/powernv/ioda: using kfree_rcu() to simplify the code The callback function of call_rcu() just calls a kfree(), so we can use kfree_rcu() instead of call_rcu() + callback function. Signed-off-by: YueHaibing Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190711141818.18044-1-yuehaibing@huawei.com --- arch/powerpc/platforms/powernv/pci-ioda-tce.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index a0b9c0c23ed278..5dc6847d5f4c84 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -340,14 +340,6 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, return -ENOMEM; } -static void pnv_iommu_table_group_link_free(struct rcu_head *head) -{ - struct iommu_table_group_link *tgl = container_of(head, - struct iommu_table_group_link, rcu); - - kfree(tgl); -} - void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, struct iommu_table_group *table_group) { @@ -363,7 +355,7 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { if (tgl->table_group == table_group) { list_del_rcu(&tgl->next); - call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free); + kfree_rcu(tgl, rcu); found = true; break; } From 35a5c328fcf3493c5adf333d34c1ca6953fe372d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 23 Oct 2019 21:44:23 +0800 Subject: [PATCH 091/144] powerpc/spufs: remove set but not used variable 'ctx' arch/powerpc/platforms/cell/spufs/inode.c:201:22: warning: variable ctx set but not used [-Wunused-but-set-variable] It is not used since commit 67cba9fd6456 ("move spu_forget() into spufs_rmdir()") Signed-off-by: YueHaibing Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191023134423.15052-1-yuehaibing@huawei.com --- arch/powerpc/platforms/cell/spufs/inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 2dd452a047cd6c..9b1586b8515247 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -198,14 +198,12 @@ static int spufs_fill_dir(struct dentry *dir, static int spufs_dir_close(struct inode *inode, struct file *file) { - struct spu_context *ctx; struct inode *parent; struct dentry *dir; int ret; dir = file->f_path.dentry; parent = d_inode(dir->d_parent); - ctx = SPUFS_I(d_inode(dir))->i_ctx; inode_lock_nested(parent, I_MUTEX_PARENT); ret = spufs_rmdir(parent, dir); From d7e02f7b7991dbe14a2acfb0e53d675cd149001c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 11 Jul 2019 20:28:14 +0530 Subject: [PATCH 092/144] powerpc/book3s/mm: Update Oops message to print the correct translation in use Avoids confusion when printing Oops message like below Faulting instruction address: 0xc00000000008bdb4 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV This was because we never clear the MMU_FTR_HPTE_TABLE feature flag even if we run with radix translation. It was discussed that we should look at this feature flag as an indication of the capability to run hash translation and we should not clear the flag even if we run in radix translation. All the code paths check for radix_enabled() check and if found true consider we are running with radix translation. Follow the same sequence for finding the MMU translation string to be used in Oops message. Signed-off-by: Aneesh Kumar K.V Acked-by: Nicholas Piggin Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190711145814.17970-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/kernel/traps.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 82f43535e68674..014ff0701f2459 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -250,15 +250,22 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, } NOKPROBE_SYMBOL(oops_end); +static char *get_mmu_str(void) +{ + if (early_radix_enabled()) + return " MMU=Radix"; + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) + return " MMU=Hash"; + return ""; +} + static int __die(const char *str, struct pt_regs *regs, long err) { printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); - printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n", + printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", - PAGE_SIZE / 1024, - early_radix_enabled() ? " MMU=Radix" : "", - early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "", + PAGE_SIZE / 1024, get_mmu_str(), IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", From bbbd7f112c7b0af32f7b3c725b2c41e93cf181f6 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 28 Aug 2019 08:07:37 +0200 Subject: [PATCH 093/144] powerpc: Replace GPL boilerplate with SPDX identifiers The FSF does not reside in "675 Mass Ave, Cambridge" anymore... let's simply use proper SPDX identifiers instead. Signed-off-by: Thomas Huth Acked-by: Russell Currey Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190828060737.32531-1-thuth@redhat.com --- arch/powerpc/include/uapi/asm/spu_info.h | 14 -------------- arch/powerpc/kernel/eeh_driver.c | 18 +----------------- arch/powerpc/kernel/eeh_sysfs.c | 18 +----------------- arch/powerpc/platforms/pseries/pci_dlpar.c | 18 +----------------- 4 files changed, 3 insertions(+), 65 deletions(-) diff --git a/arch/powerpc/include/uapi/asm/spu_info.h b/arch/powerpc/include/uapi/asm/spu_info.h index cabfcbba9eac7e..45f97150587ba5 100644 --- a/arch/powerpc/include/uapi/asm/spu_info.h +++ b/arch/powerpc/include/uapi/asm/spu_info.h @@ -5,20 +5,6 @@ * (C) Copyright 2006 IBM Corp. * * Author: Dwayne Grant McConnell - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _UAPI_SPU_INFO_H diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index d9279d0ee9f54f..516a42666b6b16 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -1,25 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * PCI Error Recovery Driver for RPA-compliant PPC64 platform. * Copyright IBM Corp. 2004 2005 * Copyright Linas Vepstas 2004, 2005 * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * * Send comments and feedback to Linas Vepstas */ #include diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 3fa04dda17371a..ab44d965a53c7a 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -1,25 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Sysfs entries for PCI Error Recovery for PAPR-compliant platform. * Copyright IBM Corporation 2007 * Copyright Linas Vepstas 2007 * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * * Send comments and feedback to Linas Vepstas */ #include diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 561917fa54a8a0..361986e4354e5e 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * PCI Dynamic LPAR, PCI Hot Plug and PCI EEH recovery code * for RPA-compliant PPC64 platform. @@ -6,23 +7,6 @@ * * Updates, 2005, John Rose * Updates, 2005, Linas Vepstas - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include From 1db550f44ab672b5ded14a94e092b9a9a2d29c8c Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 22 Oct 2019 17:06:03 +1100 Subject: [PATCH 094/144] powerpc/64s/exception: Fix kaup -> kuap typo It's KUAP, not KAUP. Fix typo in INT_COMMON macro. Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191022060603.24101-1-ajd@linux.ibm.com --- arch/powerpc/kernel/exceptions-64s.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d0018dd17e0a63..46508b148e1682 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -514,7 +514,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) * If stack=0, then the stack is already set in r1, and r1 is saved in r10. * PPR save and CPU accounting is not done for the !stack case (XXX why not?) */ -.macro INT_COMMON vec, area, stack, kaup, reconcile, dar, dsisr +.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr .if \stack andi. r10,r12,MSR_PR /* See if coming from user */ mr r10,r1 /* Save r1 */ @@ -533,7 +533,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) std r10,GPR1(r1) /* save r1 in stackframe */ .if \stack - .if \kaup + .if \kuap kuap_save_amr_and_lock r9, r10, cr1, cr0 .endif beq 101f /* if from kernel mode */ @@ -541,7 +541,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) SAVE_PPR(\area, r9) 101: .else - .if \kaup + .if \kuap kuap_save_amr_and_lock r9, r10, cr1 .endif .endif From 1ca3dec2b2dff9d286ce6cd64108bda0e98f9710 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 31 Oct 2019 07:31:00 +0100 Subject: [PATCH 095/144] powerpc/xive: Prevent page fault issues in the machine crash handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the machine crash handler is invoked, all interrupts are masked but interrupts which have not been started yet do not have an ESB page mapped in the Linux address space. This crashes the 'crash kexec' sequence on sPAPR guests. To fix, force the mapping of the ESB page when an interrupt is being mapped in the Linux IRQ number space. This is done by setting the initial state of the interrupt to OFF which is not necessarily the case on PowerNV. Fixes: 243e25112d06 ("powerpc/xive: Native exploitation of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031063100.3864-1-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index df832b09e3e9ce..f5fadbd2533a8f 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1035,6 +1035,15 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) xd->target = XIVE_INVALID_TARGET; irq_set_handler_data(virq, xd); + /* + * Turn OFF by default the interrupt being mapped. A side + * effect of this check is the mapping the ESB page of the + * interrupt in the Linux address space. This prevents page + * fault issues in the crash handler which masks all + * interrupts. + */ + xive_esb_read(xd, XIVE_ESB_SET_PQ_01); + return 0; } From f5817191b0a3257d9963a574c53d085d9f443e7d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Aug 2019 18:07:52 +0300 Subject: [PATCH 096/144] powerpc: use The powerpc version of dma-mapping.h only contains a version of get_arch_dma_ops that always return NULL. Replace it with the asm-generic version that does the same. Signed-off-by: Christoph Hellwig Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190807150752.17894-1-hch@lst.de --- arch/powerpc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/dma-mapping.h | 18 ------------------ 2 files changed, 1 insertion(+), 18 deletions(-) delete mode 100644 arch/powerpc/include/asm/dma-mapping.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 64870c7be4a3f4..8dbf85d5da0e22 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -4,6 +4,7 @@ generated-y += syscall_table_64.h generated-y += syscall_table_c32.h generated-y += syscall_table_spu.h generic-y += div64.h +generic-y += dma-mapping.h generic-y += export.h generic-y += irq_regs.h generic-y += local64.h diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h deleted file mode 100644 index 565d6f74b189cc..00000000000000 --- a/arch/powerpc/include/asm/dma-mapping.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2004 IBM - */ -#ifndef _ASM_DMA_MAPPING_H -#define _ASM_DMA_MAPPING_H - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - /* We don't handle the NULL dev case for ISA for now. We could - * do it via an out of line call but it is not needed for now. The - * only ISA DMA device we support is the floppy and we have a hack - * in the floppy driver directly to get a device for us. - */ - return NULL; -} - -#endif /* _ASM_DMA_MAPPING_H */ From b948aaaf3e39cc475e45fea727638f191a5cb1b4 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Fri, 2 Aug 2019 10:39:15 -0300 Subject: [PATCH 097/144] powerpc/pseries/hotplug-memory: Change rc variable to bool Changes the return variable to bool (as the return value) and avoids doing a ternary operation before returning. Signed-off-by: Leonardo Bras Reviewed-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190802133914.30413-1-leonardo@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 8e700390f3d6c8..c126b94d194333 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -338,7 +338,7 @@ static int pseries_remove_mem_node(struct device_node *np) static bool lmb_is_removable(struct drmem_lmb *lmb) { int i, scns_per_block; - int rc = 1; + bool rc = true; unsigned long pfn, block_sz; u64 phys_addr; @@ -363,11 +363,11 @@ static bool lmb_is_removable(struct drmem_lmb *lmb) if (!pfn_present(pfn)) continue; - rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION); + rc = rc && is_mem_section_removable(pfn, PAGES_PER_SECTION); phys_addr += MIN_MEMORY_BLOCK_SIZE; } - return rc ? true : false; + return rc; } static int dlpar_add_lmb(struct drmem_lmb *); From de84ffc3ccbeec3678f95a3d898fc188efa0d9c5 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Thu, 17 Oct 2019 15:59:37 +1100 Subject: [PATCH 098/144] powerpc/eeh: differentiate duplicate detection message Currently when an EEH error is detected, the system log receives the same (or almost the same) message twice: EEH: PHB#0 failure detected, location: N/A EEH: PHB#0 failure detected, location: N/A or EEH: eeh_dev_check_failure: Frozen PHB#0-PE#0 detected EEH: Frozen PHB#0-PE#0 detected This looks like a bug, but in fact the messages are from different functions and mean slightly different things. So keep both but change one of the messages slightly, so that it's clear they are different: EEH: PHB#0 failure detected, location: N/A EEH: Recovering PHB#0, location: N/A or EEH: eeh_dev_check_failure: Frozen PHB#0-PE#0 detected EEH: Recovering PHB#0-PE#0 Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/43817cb6e6631b0828b9a6e266f60d1f8ca8eb22.1571288375.git.sbobroff@linux.ibm.com --- arch/powerpc/kernel/eeh_driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 516a42666b6b16..3dd1a422fc29dd 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -881,12 +881,12 @@ void eeh_handle_normal_event(struct eeh_pe *pe) /* Log the event */ if (pe->type & EEH_PE_PHB) { - pr_err("EEH: PHB#%x failure detected, location: %s\n", + pr_err("EEH: Recovering PHB#%x, location: %s\n", pe->phb->global_number, eeh_pe_loc_get(pe)); } else { struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb); - pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", + pr_err("EEH: Recovering PHB#%x-PE#%x\n", pe->phb->global_number, pe->addr); pr_err("EEH: PE location: %s, PHB location: %s\n", eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); From 42484d2c0f82b666292faf6668c77b49a3a04bc0 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Thu, 12 Sep 2019 21:46:33 +0200 Subject: [PATCH 099/144] powerpc/perf: remove current_is_64bit() Since commit ed1cd6deb013 ("powerpc: Activate CONFIG_THREAD_INFO_IN_TASK") current_is_64bit() is quivalent to !is_32bit_task(). Remove the redundant function. Suggested-by: Christophe Leroy Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190912194633.12045-1-msuchanek@suse.de --- arch/powerpc/perf/callchain.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index c84bbd4298a043..35d542515faf3d 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -284,16 +284,6 @@ static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, } } -static inline int current_is_64bit(void) -{ - /* - * We can't use test_thread_flag() here because we may be on an - * interrupt stack, and the thread flags don't get copied over - * from the thread_info on the main stack to the interrupt stack. - */ - return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT); -} - #else /* CONFIG_PPC64 */ /* * On 32-bit we just access the address and let hash_page create a @@ -321,11 +311,6 @@ static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry { } -static inline int current_is_64bit(void) -{ - return 0; -} - static inline int valid_user_sp(unsigned long sp, int is_64) { if (!sp || (sp & 7) || sp > TASK_SIZE - 32) @@ -486,7 +471,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - if (current_is_64bit()) + if (!is_32bit_task()) perf_callchain_user_64(entry, regs); else perf_callchain_user_32(entry, regs); From 565f9bc05e2dad6c7fdfc7c2e641be580aa599cd Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Thu, 7 Nov 2019 17:47:57 +0100 Subject: [PATCH 100/144] powerpc/fadump: when fadump is supported register the fadump sysfs files. Currently it is not possible to distinguish the case when fadump is supported by firmware and disabled in kernel and completely unsupported using the kernel sysfs interface. User can investigate the devicetree but it is more reasonable to provide sysfs files in case we get some fadumpv2 in the future. With this patch sysfs files are available whenever fadump is supported by firmware. There is duplicate message about lack of support by firmware in fadump_reserve_mem and setup_fadump. Remove the duplicate message in setup_fadump. Signed-off-by: Michal Suchanek Reviewed-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191107164757.15140-1-msuchanek@suse.de --- arch/powerpc/kernel/fadump.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index ed59855430b93c..ff0114aeba9b5c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1466,16 +1466,15 @@ static void fadump_init_files(void) */ int __init setup_fadump(void) { - if (!fw_dump.fadump_enabled) - return 0; - - if (!fw_dump.fadump_supported) { - printk(KERN_ERR "Firmware-assisted dump is not supported on" - " this hardware\n"); + if (!fw_dump.fadump_supported) return 0; - } + fadump_init_files(); fadump_show_config(); + + if (!fw_dump.fadump_enabled) + return 1; + /* * If dump data is available then see if it is valid and prepare for * saving it to the disk. @@ -1492,8 +1491,6 @@ int __init setup_fadump(void) else if (fw_dump.reserve_dump_area_size) fw_dump.ops->fadump_init_mem_struct(&fw_dump); - fadump_init_files(); - return 1; } subsys_initcall(setup_fadump); From 8054df0570588e22007a8be6fa7615462389f27f Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:35 +0800 Subject: [PATCH 101/144] powerpc: unify definition of M_IF_NEEDED M_IF_NEEDED is defined too many times. Move it to a common place and rename it to MAS2_M_IF_NEEDED which is much readable. Signed-off-by: Jason Yan Reviewed-by: Christophe Leroy Reviewed-by: Diana Craciun Tested-by: Diana Craciun Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/mmu-book3e.h | 10 ++++++++++ arch/powerpc/kernel/exceptions-64e.S | 12 +----------- arch/powerpc/kernel/fsl_booke_entry_mapping.S | 14 ++------------ arch/powerpc/kernel/misc_64.S | 7 +------ 4 files changed, 14 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h index 4c9777d256fbe4..fa3efc2d310fa9 100644 --- a/arch/powerpc/include/asm/nohash/mmu-book3e.h +++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h @@ -221,6 +221,16 @@ #define TLBILX_T_CLASS2 6 #define TLBILX_T_CLASS3 7 +/* + * The mapping only needs to be cache-coherent on SMP, except on + * Freescale e500mc derivatives where it's also needed for coherent DMA. + */ +#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) +#define MAS2_M_IF_NEEDED MAS2_M +#else +#define MAS2_M_IF_NEEDED 0 +#endif + #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 829950b96d2912..e4076e3c072db3 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1346,16 +1346,6 @@ skpinv: addi r6,r6,1 /* Increment */ sync isync -/* - * The mapping only needs to be cache-coherent on SMP, except on - * Freescale e500mc derivatives where it's also needed for coherent DMA. - */ -#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) -#define M_IF_NEEDED MAS2_M -#else -#define M_IF_NEEDED 0 -#endif - /* 6. Setup KERNELBASE mapping in TLB[0] * * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in @@ -1368,7 +1358,7 @@ skpinv: addi r6,r6,1 /* Increment */ ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l mtspr SPRN_MAS1,r6 - LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_NEEDED) + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | MAS2_M_IF_NEEDED) mtspr SPRN_MAS2,r6 rlwinm r5,r5,0,0,25 diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S index ea065282b3035a..f4d3eaae54a935 100644 --- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S +++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S @@ -153,16 +153,6 @@ skpinv: addi r6,r6,1 /* Increment */ tlbivax 0,r9 TLBSYNC -/* - * The mapping only needs to be cache-coherent on SMP, except on - * Freescale e500mc derivatives where it's also needed for coherent DMA. - */ -#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) -#define M_IF_NEEDED MAS2_M -#else -#define M_IF_NEEDED 0 -#endif - #if defined(ENTRY_MAPPING_BOOT_SETUP) /* 6. Setup KERNELBASE mapping in TLB1[0] */ @@ -171,8 +161,8 @@ skpinv: addi r6,r6,1 /* Increment */ lis r6,(MAS1_VALID|MAS1_IPROT)@h ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l mtspr SPRN_MAS1,r6 - lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@h - ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@l + lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, MAS2_M_IF_NEEDED)@h + ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, MAS2_M_IF_NEEDED)@l mtspr SPRN_MAS2,r6 mtspr SPRN_MAS3,r8 tlbwe diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index b55a7b4cb543fd..2062a299a22d3d 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -432,18 +432,13 @@ kexec_create_tlb: rlwimi r9,r10,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r9) */ /* Set up a temp identity mapping v:0 to p:0 and return to it. */ -#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) -#define M_IF_NEEDED MAS2_M -#else -#define M_IF_NEEDED 0 -#endif mtspr SPRN_MAS0,r9 lis r9,(MAS1_VALID|MAS1_IPROT)@h ori r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l mtspr SPRN_MAS1,r9 - LOAD_REG_IMMEDIATE(r9, 0x0 | M_IF_NEEDED) + LOAD_REG_IMMEDIATE(r9, 0x0 | MAS2_M_IF_NEEDED) mtspr SPRN_MAS2,r9 LOAD_REG_IMMEDIATE(r9, 0x0 | MAS3_SR | MAS3_SW | MAS3_SX) From 4ed47dbefa299d7b36944f6d4001ee83612dd680 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:36 +0800 Subject: [PATCH 102/144] powerpc: move memstart_addr and kernstart_addr to init-common.c These two variables are both defined in init_32.c and init_64.c. Move them to init-common.c and make them __ro_after_init. Signed-off-by: Jason Yan Reviewed-by: Christophe Leroy Reviewed-by: Diana Craciun Tested-by: Diana Craciun Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/mm/init-common.c | 5 +++++ arch/powerpc/mm/init_32.c | 5 ----- arch/powerpc/mm/init_64.c | 5 ----- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index a84da92920f797..e223da482c0c35 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -21,6 +21,11 @@ #include #include +phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull; +EXPORT_SYMBOL_GPL(memstart_addr); +phys_addr_t kernstart_addr __ro_after_init; +EXPORT_SYMBOL_GPL(kernstart_addr); + static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index b04896a88d792e..872df48ae41bc2 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -56,11 +56,6 @@ phys_addr_t total_memory; phys_addr_t total_lowmem; -phys_addr_t memstart_addr = (phys_addr_t)~0ull; -EXPORT_SYMBOL(memstart_addr); -phys_addr_t kernstart_addr; -EXPORT_SYMBOL(kernstart_addr); - #ifdef CONFIG_RELOCATABLE /* Used in __va()/__pa() */ long long virt_phys_offset; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 4e08246acd79ad..28b9596b4b9086 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -63,11 +63,6 @@ #include -phys_addr_t memstart_addr = ~0; -EXPORT_SYMBOL_GPL(memstart_addr); -phys_addr_t kernstart_addr; -EXPORT_SYMBOL_GPL(kernstart_addr); - #ifdef CONFIG_SPARSEMEM_VMEMMAP /* * Given an address within the vmemmap, determine the pfn of the page that From 39f4b7bf7571a9c6529b0bb3de49c9bb0998f194 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:37 +0800 Subject: [PATCH 103/144] powerpc: introduce kernstart_virt_addr to store the kernel base Now the kernel base is a fixed value - KERNELBASE. To support KASLR, we need a variable to store the kernel base. Signed-off-by: Jason Yan Reviewed-by: Christophe Leroy Reviewed-by: Diana Craciun Tested-by: Diana Craciun Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/page.h | 2 ++ arch/powerpc/mm/init-common.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index c8bb14ff47135d..88fa53f89f5a5f 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -325,6 +325,8 @@ void arch_free_page(struct page *page, int order); struct vm_area_struct; +extern unsigned long kernstart_virt_addr; + #include #endif /* __ASSEMBLY__ */ #include diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index e223da482c0c35..42ef7a6e609803 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -25,6 +25,8 @@ phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull; EXPORT_SYMBOL_GPL(memstart_addr); phys_addr_t kernstart_addr __ro_after_init; EXPORT_SYMBOL_GPL(kernstart_addr); +unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE; +EXPORT_SYMBOL_GPL(kernstart_virt_addr); static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); From aa1d2090e69311c65f69c0fa2311d1d0f01c55f8 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:38 +0800 Subject: [PATCH 104/144] powerpc/fsl_booke/32: introduce create_kaslr_tlb_entry() helper Add a new helper create_kaslr_tlb_entry() to create a tlb entry by the virtual and physical address. This is a preparation to support boot kernel at a randomized address. Signed-off-by: Jason Yan Reviewed-by: Christophe Leroy Reviewed-by: Diana Craciun Tested-by: Diana Craciun Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_fsl_booke.S | 35 ++++++++++++++++++++++++++++ arch/powerpc/mm/mmu_decl.h | 1 + 2 files changed, 36 insertions(+) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index adf0505dbe0222..8c1928176ffed3 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1114,6 +1114,41 @@ __secondary_hold_acknowledge: .long -1 #endif +/* + * Create a 64M tlb by address and entry + * r3 - entry + * r4 - virtual address + * r5/r6 - physical address + */ +_GLOBAL(create_kaslr_tlb_entry) + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */ + mtspr SPRN_MAS0,r7 /* Write MAS0 */ + + lis r3,(MAS1_VALID|MAS1_IPROT)@h + ori r3,r3,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l + mtspr SPRN_MAS1,r3 /* Write MAS1 */ + + lis r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h + ori r3,r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l + and r3,r3,r4 + ori r3,r3,MAS2_M_IF_NEEDED@l + mtspr SPRN_MAS2,r3 /* Write MAS2(EPN) */ + +#ifdef CONFIG_PHYS_64BIT + ori r8,r6,(MAS3_SW|MAS3_SR|MAS3_SX) + mtspr SPRN_MAS3,r8 /* Write MAS3(RPN) */ + mtspr SPRN_MAS7,r5 +#else + ori r8,r5,(MAS3_SW|MAS3_SR|MAS3_SX) + mtspr SPRN_MAS3,r8 /* Write MAS3(RPN) */ +#endif + + tlbwe /* Write TLB */ + isync + sync + blr + /* * Create a tlb entry with the same effective and physical address as * the tlb entry used by the current running code. But set the TS to 1. diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index c750ac9ec7130e..9da261ad54c31a 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -139,6 +139,7 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, extern void adjust_total_lowmem(void); extern int switch_to_as1(void); extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu); +void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys); #endif extern void loadcam_entry(unsigned int index); extern void loadcam_multi(int first_idx, int num, int tmp_idx); From c061b38a3e48663c29611e3b60afffe624d7c830 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:39 +0800 Subject: [PATCH 105/144] powerpc/fsl_booke/32: introduce reloc_kernel_entry() helper Add a new helper reloc_kernel_entry() to jump back to the start of the new kernel. After we put the new kernel in a randomized place we can use this new helper to enter the kernel and begin to relocate again. Signed-off-by: Jason Yan Reviewed-by: Christophe Leroy Reviewed-by: Diana Craciun Tested-by: Diana Craciun Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_fsl_booke.S | 13 +++++++++++++ arch/powerpc/mm/mmu_decl.h | 1 + 2 files changed, 14 insertions(+) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 8c1928176ffed3..d9f599b01ff13e 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1149,6 +1149,19 @@ _GLOBAL(create_kaslr_tlb_entry) sync blr +/* + * Return to the start of the relocated kernel and run again + * r3 - virtual address of fdt + * r4 - entry of the kernel + */ +_GLOBAL(reloc_kernel_entry) + mfmsr r7 + rlwinm r7, r7, 0, ~(MSR_IS | MSR_DS) + + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r7 + rfi + /* * Create a tlb entry with the same effective and physical address as * the tlb entry used by the current running code. But set the TS to 1. diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 9da261ad54c31a..bbfd185fb99afe 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -140,6 +140,7 @@ extern void adjust_total_lowmem(void); extern int switch_to_as1(void); extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu); void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys); +void reloc_kernel_entry(void *fdt, int addr); #endif extern void loadcam_entry(unsigned int index); extern void loadcam_multi(int first_idx, int num, int tmp_idx); From 2b0e86cc5de6dabadc2d64cefa429fc227c8a756 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:40 +0800 Subject: [PATCH 106/144] powerpc/fsl_booke/32: implement KASLR infrastructure This patch add support to boot kernel from places other than KERNELBASE. Since CONFIG_RELOCATABLE has already supported, what we need to do is map or copy kernel to a proper place and relocate. Freescale Book-E parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1 entries are not suitable to map the kernel directly in a randomized region, so we chose to copy the kernel to a proper place and restart to relocate. The offset of the kernel was not randomized yet(a fixed 64M is set). We will randomize it in the next patch. Signed-off-by: Jason Yan Tested-by: Diana Craciun Reviewed-by: Christophe Leroy Signed-off-by: Scott Wood [mpe: Use PTRRELOC() in early_init()] Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 11 ++++ arch/powerpc/include/asm/nohash/mmu-book3e.h | 1 - arch/powerpc/kernel/early_32.c | 9 ++- arch/powerpc/kernel/fsl_booke_entry_mapping.S | 15 +++-- arch/powerpc/kernel/head_fsl_booke.S | 13 +++- arch/powerpc/mm/mmu_decl.h | 7 +++ arch/powerpc/mm/nohash/Makefile | 1 + arch/powerpc/mm/nohash/fsl_booke.c | 7 ++- arch/powerpc/mm/nohash/kaslr_booke.c | 62 +++++++++++++++++++ 9 files changed, 109 insertions(+), 17 deletions(-) create mode 100644 arch/powerpc/mm/nohash/kaslr_booke.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3e56c9c2f16eed..4c4a0fcd16742c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -551,6 +551,17 @@ config RELOCATABLE setting can still be useful to bootwrappers that need to know the load address of the kernel (eg. u-boot/mkimage). +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + depends on (FSL_BOOKE && FLATMEM && PPC32) + depends on RELOCATABLE + help + Randomizes the virtual address at which the kernel image is + loaded, as a security feature that deters exploit attempts + relying on knowledge of the location of kernel internals. + + If unsure, say Y. + config RELOCATABLE_TEST bool "Test relocatable kernel" depends on (PPC64 && RELOCATABLE) diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h index fa3efc2d310fa9..b410046643126e 100644 --- a/arch/powerpc/include/asm/nohash/mmu-book3e.h +++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h @@ -75,7 +75,6 @@ #define MAS2_E 0x00000001 #define MAS2_WIMGE_MASK 0x0000001f #define MAS2_EPN_MASK(size) (~0 << (size + 10)) -#define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags)) #define MAS3_RPN 0xFFFFF000 #define MAS3_U0 0x00000200 diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c index 3482118ffe763a..ef2ad49459040d 100644 --- a/arch/powerpc/kernel/early_32.c +++ b/arch/powerpc/kernel/early_32.c @@ -19,10 +19,13 @@ */ notrace unsigned long __init early_init(unsigned long dt_ptr) { - unsigned long offset = reloc_offset(); + unsigned long kva, offset = reloc_offset(); + + kva = *PTRRELOC(&kernstart_virt_addr); /* First zero the BSS */ - memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start); + if (kva == KERNELBASE) + memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start); /* * Identify the CPU type and fix up code sections @@ -32,5 +35,5 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) apply_feature_fixups(); - return KERNELBASE + offset; + return kva + offset; } diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S index f4d3eaae54a935..8bccce6544b5ea 100644 --- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S +++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S @@ -155,23 +155,22 @@ skpinv: addi r6,r6,1 /* Increment */ #if defined(ENTRY_MAPPING_BOOT_SETUP) -/* 6. Setup KERNELBASE mapping in TLB1[0] */ +/* 6. Setup kernstart_virt_addr mapping in TLB1[0] */ lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */ mtspr SPRN_MAS0,r6 lis r6,(MAS1_VALID|MAS1_IPROT)@h ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l mtspr SPRN_MAS1,r6 - lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, MAS2_M_IF_NEEDED)@h - ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, MAS2_M_IF_NEEDED)@l + lis r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h + ori r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l + and r6,r6,r20 + ori r6,r6,MAS2_M_IF_NEEDED@l mtspr SPRN_MAS2,r6 mtspr SPRN_MAS3,r8 tlbwe -/* 7. Jump to KERNELBASE mapping */ - lis r6,(KERNELBASE & ~0xfff)@h - ori r6,r6,(KERNELBASE & ~0xfff)@l - rlwinm r7,r25,0,0x03ffffff - add r6,r7,r6 +/* 7. Jump to kernstart_virt_addr mapping */ + mr r6,r20 #elif defined(ENTRY_MAPPING_KEXEC_SETUP) /* diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index d9f599b01ff13e..838d9d4650c751 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -155,6 +155,8 @@ _ENTRY(_start); */ _ENTRY(__early_start) + LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr) + lwz r20,0(r20) #define ENTRY_MAPPING_BOOT_SETUP #include "fsl_booke_entry_mapping.S" @@ -277,8 +279,8 @@ set_ivor: ori r6, r6, swapper_pg_dir@l lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l - lis r4, KERNELBASE@h - ori r4, r4, KERNELBASE@l + lis r3, kernstart_virt_addr@ha + lwz r4, kernstart_virt_addr@l(r3) stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */ stw r6, 0(r5) @@ -1067,7 +1069,12 @@ __secondary_start: mr r5,r25 /* phys kernel start */ rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */ subf r4,r5,r4 /* memstart_addr - phys kernel start */ - li r5,0 /* no device tree */ + lis r7,KERNELBASE@h + ori r7,r7,KERNELBASE@l + cmpw r20,r7 /* if kernstart_virt_addr != KERNELBASE, randomized */ + beq 2f + li r4,0 +2: li r5,0 /* no device tree */ li r6,0 /* not boot cpu */ bl restore_to_as0 diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index bbfd185fb99afe..ae06c5675abbb1 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -141,10 +141,17 @@ extern int switch_to_as1(void); extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu); void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys); void reloc_kernel_entry(void *fdt, int addr); +extern int is_second_reloc; #endif extern void loadcam_entry(unsigned int index); extern void loadcam_multi(int first_idx, int num, int tmp_idx); +#ifdef CONFIG_RANDOMIZE_BASE +void kaslr_early_init(void *dt_ptr, phys_addr_t size); +#else +static inline void kaslr_early_init(void *dt_ptr, phys_addr_t size) {} +#endif + struct tlbcam { u32 MAS0; u32 MAS1; diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile index 33b6f6f29d3f25..0424f6ce5bd80c 100644 --- a/arch/powerpc/mm/nohash/Makefile +++ b/arch/powerpc/mm/nohash/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_40x) += 40x.o obj-$(CONFIG_44x) += 44x.o obj-$(CONFIG_PPC_8xx) += 8xx.o obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke.o +obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_booke.o ifdef CONFIG_HUGETLB_PAGE obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o endif diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c index 556e3cd52a35f4..2dc27cf88add00 100644 --- a/arch/powerpc/mm/nohash/fsl_booke.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -263,7 +263,8 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, int __initdata is_second_reloc; notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) { - unsigned long base = KERNELBASE; + unsigned long base = kernstart_virt_addr; + phys_addr_t size; kernstart_addr = start; if (is_second_reloc) { @@ -291,7 +292,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) start &= ~0x3ffffff; base &= ~0x3ffffff; virt_phys_offset = base - start; - early_get_first_memblock_info(__va(dt_ptr), NULL); + early_get_first_memblock_info(__va(dt_ptr), &size); /* * We now get the memstart_addr, then we should check if this * address is the same as what the PAGE_OFFSET map to now. If @@ -316,6 +317,8 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) /* We should never reach here */ panic("Relocation error"); } + + kaslr_early_init(__va(dt_ptr), size); } #endif #endif diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c new file mode 100644 index 00000000000000..29c1567d8d407c --- /dev/null +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Copyright (C) 2019 Jason Yan + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size, + unsigned long kernel_sz) +{ + /* return a fixed offset of 64M for now */ + return SZ_64M; +} + +/* + * To see if we need to relocate the kernel to a random offset + * void *dt_ptr - address of the device tree + * phys_addr_t size - size of the first memory block + */ +notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size) +{ + unsigned long tlb_virt; + phys_addr_t tlb_phys; + unsigned long offset; + unsigned long kernel_sz; + + kernel_sz = (unsigned long)_end - (unsigned long)_stext; + + offset = kaslr_choose_location(dt_ptr, size, kernel_sz); + if (offset == 0) + return; + + kernstart_virt_addr += offset; + kernstart_addr += offset; + + is_second_reloc = 1; + + if (offset >= SZ_64M) { + tlb_virt = round_down(kernstart_virt_addr, SZ_64M); + tlb_phys = round_down(kernstart_addr, SZ_64M); + + /* Create kernel map to relocate in */ + create_kaslr_tlb_entry(1, tlb_virt, tlb_phys); + } + + /* Copy the kernel to it's new location and run */ + memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz); + flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz); + + reloc_kernel_entry(dt_ptr, kernstart_virt_addr); +} From 6a38ea1d7b94c6c84dbf3f5c969be5e3648d9a70 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:41 +0800 Subject: [PATCH 107/144] powerpc/fsl_booke/32: randomize the kernel image offset After we have the basic support of relocate the kernel in some appropriate place, we can start to randomize the offset now. Entropy is derived from the banner and timer, which will change every build and boot. This not so much safe so additionally the bootloader may pass entropy via the /chosen/kaslr-seed node in device tree. We will use the first 512M of the low memory to randomize the kernel image. The memory will be split in 64M zones. We will use the lower 8 bit of the entropy to decide the index of the 64M zone. Then we chose a 16K aligned offset inside the 64M zone to put the kernel in. We also check if we will overlap with some areas like the dtb area, the initrd area or the crashkernel area. If we cannot find a proper area, kaslr will be disabled and boot from the original kernel. Some pieces of code are derived from arch/x86/boot/compressed/kaslr.c or arch/arm64/kernel/kaslr.c such as rotate_xor(). Credit goes to Kees and Ard. Signed-off-by: Jason Yan Reviewed-by: Diana Craciun Tested-by: Diana Craciun Reviewed-by: Christophe Leroy Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/mm/nohash/kaslr_booke.c | 325 ++++++++++++++++++++++++++- 1 file changed, 323 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 29c1567d8d407c..7b238fc2c8a936 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -12,15 +12,336 @@ #include #include #include +#include +#include #include #include +#include #include +#include +#include + +struct regions { + unsigned long pa_start; + unsigned long pa_end; + unsigned long kernel_size; + unsigned long dtb_start; + unsigned long dtb_end; + unsigned long initrd_start; + unsigned long initrd_end; + unsigned long crash_start; + unsigned long crash_end; + int reserved_mem; + int reserved_mem_addr_cells; + int reserved_mem_size_cells; +}; + +/* Simplified build-specific string for starting entropy. */ +static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; + +struct regions __initdata regions; + +static __init void kaslr_get_cmdline(void *fdt) +{ + int node = fdt_path_offset(fdt, "/chosen"); + + early_init_dt_scan_chosen(node, "chosen", 1, boot_command_line); +} + +static unsigned long __init rotate_xor(unsigned long hash, const void *area, + size_t size) +{ + size_t i; + const unsigned long *ptr = area; + + for (i = 0; i < size / sizeof(hash); i++) { + /* Rotate by odd number of bits and XOR. */ + hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); + hash ^= ptr[i]; + } + + return hash; +} + +/* Attempt to create a simple starting entropy. This can make it defferent for + * every build but it is still not enough. Stronger entropy should + * be added to make it change for every boot. + */ +static unsigned long __init get_boot_seed(void *fdt) +{ + unsigned long hash = 0; + + hash = rotate_xor(hash, build_str, sizeof(build_str)); + hash = rotate_xor(hash, fdt, fdt_totalsize(fdt)); + + return hash; +} + +static __init u64 get_kaslr_seed(void *fdt) +{ + int node, len; + fdt64_t *prop; + u64 ret; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return 0; + + prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); + if (!prop || len != sizeof(u64)) + return 0; + + ret = fdt64_to_cpu(*prop); + *prop = 0; + return ret; +} + +static __init bool regions_overlap(u32 s1, u32 e1, u32 s2, u32 e2) +{ + return e1 >= s2 && e2 >= s1; +} + +static __init bool overlaps_reserved_region(const void *fdt, u32 start, + u32 end) +{ + int subnode, len, i; + u64 base, size; + + /* check for overlap with /memreserve/ entries */ + for (i = 0; i < fdt_num_mem_rsv(fdt); i++) { + if (fdt_get_mem_rsv(fdt, i, &base, &size) < 0) + continue; + if (regions_overlap(start, end, base, base + size)) + return true; + } + + if (regions.reserved_mem < 0) + return false; + + /* check for overlap with static reservations in /reserved-memory */ + for (subnode = fdt_first_subnode(fdt, regions.reserved_mem); + subnode >= 0; + subnode = fdt_next_subnode(fdt, subnode)) { + const fdt32_t *reg; + u64 rsv_end; + + len = 0; + reg = fdt_getprop(fdt, subnode, "reg", &len); + while (len >= (regions.reserved_mem_addr_cells + + regions.reserved_mem_size_cells)) { + base = fdt32_to_cpu(reg[0]); + if (regions.reserved_mem_addr_cells == 2) + base = (base << 32) | fdt32_to_cpu(reg[1]); + + reg += regions.reserved_mem_addr_cells; + len -= 4 * regions.reserved_mem_addr_cells; + + size = fdt32_to_cpu(reg[0]); + if (regions.reserved_mem_size_cells == 2) + size = (size << 32) | fdt32_to_cpu(reg[1]); + + reg += regions.reserved_mem_size_cells; + len -= 4 * regions.reserved_mem_size_cells; + + if (base >= regions.pa_end) + continue; + + rsv_end = min(base + size, (u64)U32_MAX); + + if (regions_overlap(start, end, base, rsv_end)) + return true; + } + } + return false; +} + +static __init bool overlaps_region(const void *fdt, u32 start, + u32 end) +{ + if (regions_overlap(start, end, __pa(_stext), __pa(_end))) + return true; + + if (regions_overlap(start, end, regions.dtb_start, + regions.dtb_end)) + return true; + + if (regions_overlap(start, end, regions.initrd_start, + regions.initrd_end)) + return true; + + if (regions_overlap(start, end, regions.crash_start, + regions.crash_end)) + return true; + + return overlaps_reserved_region(fdt, start, end); +} + +static void __init get_crash_kernel(void *fdt, unsigned long size) +{ +#ifdef CONFIG_CRASH_CORE + unsigned long long crash_size, crash_base; + int ret; + + ret = parse_crashkernel(boot_command_line, size, &crash_size, + &crash_base); + if (ret != 0 || crash_size == 0) + return; + if (crash_base == 0) + crash_base = KDUMP_KERNELBASE; + + regions.crash_start = (unsigned long)crash_base; + regions.crash_end = (unsigned long)(crash_base + crash_size); + + pr_debug("crash_base=0x%llx crash_size=0x%llx\n", crash_base, crash_size); +#endif +} + +static void __init get_initrd_range(void *fdt) +{ + u64 start, end; + int node, len; + const __be32 *prop; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return; + + prop = fdt_getprop(fdt, node, "linux,initrd-start", &len); + if (!prop) + return; + start = of_read_number(prop, len / 4); + + prop = fdt_getprop(fdt, node, "linux,initrd-end", &len); + if (!prop) + return; + end = of_read_number(prop, len / 4); + + regions.initrd_start = (unsigned long)start; + regions.initrd_end = (unsigned long)end; + + pr_debug("initrd_start=0x%llx initrd_end=0x%llx\n", start, end); +} + +static __init unsigned long get_usable_address(const void *fdt, + unsigned long start, + unsigned long offset) +{ + unsigned long pa; + unsigned long pa_end; + + for (pa = offset; (long)pa > (long)start; pa -= SZ_16K) { + pa_end = pa + regions.kernel_size; + if (overlaps_region(fdt, pa, pa_end)) + continue; + + return pa; + } + return 0; +} + +static __init void get_cell_sizes(const void *fdt, int node, int *addr_cells, + int *size_cells) +{ + const int *prop; + int len; + + /* + * Retrieve the #address-cells and #size-cells properties + * from the 'node', or use the default if not provided. + */ + *addr_cells = *size_cells = 1; + + prop = fdt_getprop(fdt, node, "#address-cells", &len); + if (len == 4) + *addr_cells = fdt32_to_cpu(*prop); + prop = fdt_getprop(fdt, node, "#size-cells", &len); + if (len == 4) + *size_cells = fdt32_to_cpu(*prop); +} + +static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long index, + unsigned long offset) +{ + unsigned long koffset = 0; + unsigned long start; + + while ((long)index >= 0) { + offset = memstart_addr + index * SZ_64M + offset; + start = memstart_addr + index * SZ_64M; + koffset = get_usable_address(dt_ptr, start, offset); + if (koffset) + break; + index--; + } + + if (koffset != 0) + koffset -= memstart_addr; + + return koffset; +} static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size, unsigned long kernel_sz) { - /* return a fixed offset of 64M for now */ - return SZ_64M; + unsigned long offset, random; + unsigned long ram, linear_sz; + u64 seed; + unsigned long index; + + kaslr_get_cmdline(dt_ptr); + + random = get_boot_seed(dt_ptr); + + seed = get_tb() << 32; + seed ^= get_tb(); + random = rotate_xor(random, &seed, sizeof(seed)); + + /* + * Retrieve (and wipe) the seed from the FDT + */ + seed = get_kaslr_seed(dt_ptr); + if (seed) + random = rotate_xor(random, &seed, sizeof(seed)); + else + pr_warn("KASLR: No safe seed for randomizing the kernel base.\n"); + + ram = min_t(phys_addr_t, __max_low_memory, size); + ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true); + linear_sz = min_t(unsigned long, ram, SZ_512M); + + /* If the linear size is smaller than 64M, do not randmize */ + if (linear_sz < SZ_64M) + return 0; + + /* check for a reserved-memory node and record its cell sizes */ + regions.reserved_mem = fdt_path_offset(dt_ptr, "/reserved-memory"); + if (regions.reserved_mem >= 0) + get_cell_sizes(dt_ptr, regions.reserved_mem, + ®ions.reserved_mem_addr_cells, + ®ions.reserved_mem_size_cells); + + regions.pa_start = memstart_addr; + regions.pa_end = memstart_addr + linear_sz; + regions.dtb_start = __pa(dt_ptr); + regions.dtb_end = __pa(dt_ptr) + fdt_totalsize(dt_ptr); + regions.kernel_size = kernel_sz; + + get_initrd_range(dt_ptr); + get_crash_kernel(dt_ptr, ram); + + /* + * Decide which 64M we want to start + * Only use the low 8 bits of the random seed + */ + index = random & 0xFF; + index %= linear_sz / SZ_64M; + + /* Decide offset inside 64M */ + offset = random % (SZ_64M - kernel_sz); + offset = round_down(offset, SZ_16K); + + return kaslr_legal_offset(dt_ptr, index, offset); } /* From b39609720069f5a6eed2b3e3f618c23587021ff5 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:42 +0800 Subject: [PATCH 108/144] powerpc/fsl_booke/kaslr: clear the original kernel if randomized The original kernel still exists in the memory, clear it now. Signed-off-by: Jason Yan Reviewed-by: Christophe Leroy Reviewed-by: Diana Craciun Tested-by: Diana Craciun Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mmu_decl.h | 2 ++ arch/powerpc/mm/nohash/fsl_booke.c | 1 + arch/powerpc/mm/nohash/kaslr_booke.c | 11 +++++++++++ 3 files changed, 14 insertions(+) diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index ae06c5675abbb1..8e99649c24fc4d 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -148,8 +148,10 @@ extern void loadcam_multi(int first_idx, int num, int tmp_idx); #ifdef CONFIG_RANDOMIZE_BASE void kaslr_early_init(void *dt_ptr, phys_addr_t size); +void kaslr_late_init(void); #else static inline void kaslr_early_init(void *dt_ptr, phys_addr_t size) {} +static inline void kaslr_late_init(void) {} #endif struct tlbcam { diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c index 2dc27cf88add00..b4eb06ceb1892a 100644 --- a/arch/powerpc/mm/nohash/fsl_booke.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -269,6 +269,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) kernstart_addr = start; if (is_second_reloc) { virt_phys_offset = PAGE_OFFSET - memstart_addr; + kaslr_late_init(); return; } diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 7b238fc2c8a936..aa1b60c782e75e 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -381,3 +381,14 @@ notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size) reloc_kernel_entry(dt_ptr, kernstart_virt_addr); } + +void __init kaslr_late_init(void) +{ + /* If randomized, clear the original kernel */ + if (kernstart_virt_addr != KERNELBASE) { + unsigned long kernel_sz; + + kernel_sz = (unsigned long)_end - kernstart_virt_addr; + memzero_explicit((void *)KERNELBASE, kernel_sz); + } +} From 8c2ae87be5a4bb2d3cadff72d3aa5f1e3d5aac2b Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:43 +0800 Subject: [PATCH 109/144] powerpc/fsl_booke/kaslr: support nokaslr cmdline parameter One may want to disable kaslr when boot, so provide a cmdline parameter 'nokaslr' to support this. Signed-off-by: Jason Yan Reviewed-by: Diana Craciun Tested-by: Diana Craciun Reviewed-by: Christophe Leroy Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/mm/nohash/kaslr_booke.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index aa1b60c782e75e..4a75f2d9bf0e0f 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -281,6 +281,11 @@ static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long index return koffset; } +static inline __init bool kaslr_disabled(void) +{ + return strstr(boot_command_line, "nokaslr") != NULL; +} + static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size, unsigned long kernel_sz) { @@ -290,6 +295,8 @@ static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size unsigned long index; kaslr_get_cmdline(dt_ptr); + if (kaslr_disabled()) + return 0; random = get_boot_seed(dt_ptr); From 921a79b7802078fab3787c7eae561536906cb8f3 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:44 +0800 Subject: [PATCH 110/144] powerpc/fsl_booke/kaslr: dump out kernel offset information on panic When kaslr is enabled, the kernel offset is different for every boot. This brings some difficult to debug the kernel. Dump out the kernel offset when panic so that we can easily debug the kernel. This code is derived from x86/arm64 which has similar functionality. Signed-off-by: Jason Yan Reviewed-by: Christophe Leroy Reviewed-by: Diana Craciun Tested-by: Diana Craciun Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/page.h | 5 +++++ arch/powerpc/kernel/setup-common.c | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 88fa53f89f5a5f..f2f3ed5a89694f 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -327,6 +327,11 @@ struct vm_area_struct; extern unsigned long kernstart_virt_addr; +static inline unsigned long kaslr_offset(void) +{ + return kernstart_virt_addr - KERNELBASE; +} + #include #endif /* __ASSEMBLY__ */ #include diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 25aaa390300091..488f1eecc0deb0 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -715,8 +715,28 @@ static struct notifier_block ppc_panic_block = { .priority = INT_MIN /* may not return; must be done last */ }; +/* + * Dump out kernel offset information on panic. + */ +static int dump_kernel_offset(struct notifier_block *self, unsigned long v, + void *p) +{ + pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n", + kaslr_offset(), KERNELBASE); + + return 0; +} + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + void __init setup_panic(void) { + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + /* PPC64 always does a hard irq disable in its panic handler */ if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic) return; From 74277f00b23263066772fd9e9106acb6a280f84f Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:45 +0800 Subject: [PATCH 111/144] powerpc/fsl_booke/kaslr: export offset in VMCOREINFO ELF notes Like all other architectures such as x86 or arm64, include KASLR offset in VMCOREINFO ELF notes to assist in debugging. After this, we can use crash --kaslr option to parse vmcore generated from a kaslr kernel. Note: The crash tool needs to support --kaslr too. Signed-off-by: Jason Yan Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/machine_kexec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index c4ed328a7b963f..078fe3d76feb6b 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -86,6 +86,7 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_STRUCT_SIZE(mmu_psize_def); VMCOREINFO_OFFSET(mmu_psize_def, shift); #endif + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); } /* From c2d1a13520eee7f0ac64ffb94f8756006320e4b8 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 20 Sep 2019 17:45:46 +0800 Subject: [PATCH 112/144] powerpc/fsl_booke/32: Document KASLR implementation Add document to explain how we implement KASLR for fsl_booke32. Signed-off-by: Jason Yan Signed-off-by: Scott Wood [mpe: Add it to the index as well] Signed-off-by: Michael Ellerman --- Documentation/powerpc/index.rst | 1 + Documentation/powerpc/kaslr-booke32.rst | 42 +++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 Documentation/powerpc/kaslr-booke32.rst diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst index db7b6a880f522b..ba5edb3211c058 100644 --- a/Documentation/powerpc/index.rst +++ b/Documentation/powerpc/index.rst @@ -19,6 +19,7 @@ powerpc firmware-assisted-dump hvcs isa-versions + kaslr-booke32 mpc52xx pci_iov_resource_on_powernv pmu-ebb diff --git a/Documentation/powerpc/kaslr-booke32.rst b/Documentation/powerpc/kaslr-booke32.rst new file mode 100644 index 00000000000000..8b259fdfdf03bb --- /dev/null +++ b/Documentation/powerpc/kaslr-booke32.rst @@ -0,0 +1,42 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +KASLR for Freescale BookE32 +=========================== + +The word KASLR stands for Kernel Address Space Layout Randomization. + +This document tries to explain the implementation of the KASLR for +Freescale BookE32. KASLR is a security feature that deters exploit +attempts relying on knowledge of the location of kernel internals. + +Since CONFIG_RELOCATABLE has already supported, what we need to do is +map or copy kernel to a proper place and relocate. Freescale Book-E +parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1 +entries are not suitable to map the kernel directly in a randomized +region, so we chose to copy the kernel to a proper place and restart to +relocate. + +Entropy is derived from the banner and timer base, which will change every +build and boot. This not so much safe so additionally the bootloader may +pass entropy via the /chosen/kaslr-seed node in device tree. + +We will use the first 512M of the low memory to randomize the kernel +image. The memory will be split in 64M zones. We will use the lower 8 +bit of the entropy to decide the index of the 64M zone. Then we chose a +16K aligned offset inside the 64M zone to put the kernel in:: + + KERNELBASE + + |--> 64M <--| + | | + +---------------+ +----------------+---------------+ + | |....| |kernel| | | + +---------------+ +----------------+---------------+ + | | + |-----> offset <-----| + + kernstart_virt_addr + +To enable KASLR, set CONFIG_RANDOMIZE_BASE = y. If KASLR is enable and you +want to disable it at runtime, add "nokaslr" to the kernel cmdline. From 0695f8bca93ea0c57f0e8e21b4b4db70183b3d1c Mon Sep 17 00:00:00 2001 From: Harish Date: Wed, 13 Nov 2019 15:12:19 +0530 Subject: [PATCH 113/144] selftests/powerpc: Handle Makefile for unrecognized option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On older distributions like Sles12SP5 gcc does not recognize -no-pie option making the powerpc selftests build to fail Fixes the following: gcc: error: unrecognized command line option ‘-no-pie’ Signed-off-by: Harish Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191113094219.14946-1-harish@linux.ibm.com --- tools/testing/selftests/powerpc/pmu/ebb/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 23f4caf48ffc6b..417306353e07bb 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../../../../../scripts/Kbuild.include + noarg: $(MAKE) -C ../../ @@ -6,7 +8,10 @@ noarg: CFLAGS += -m64 # Toolchains may build PIE by default which breaks the assembly -LDFLAGS += -no-pie +no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ + $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + +LDFLAGS += $(no-pie-option) TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ cycles_with_freeze_test pmc56_overflow_test \ From ea67a5519d61f14517f9ee35b3151f44202c2023 Mon Sep 17 00:00:00 2001 From: Valentin Longchamp Date: Sun, 14 Jul 2019 22:05:01 +0200 Subject: [PATCH 114/144] powerpc/kmcent2: update the ethernet devices' phy properties Change all phy-connection-type properties to phy-mode that are better supported by the fman driver. Use the more readable fixed-link node for the 2 sgmii links. Change the RGMII link to rgmii-id as the clock delays are added by the phy. Signed-off-by: Valentin Longchamp Acked-by: Madalin Bucur Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/kmcent2.dts | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/kmcent2.dts b/arch/powerpc/boot/dts/fsl/kmcent2.dts index 48b7f9797124c4..c3e0741cafb1e5 100644 --- a/arch/powerpc/boot/dts/fsl/kmcent2.dts +++ b/arch/powerpc/boot/dts/fsl/kmcent2.dts @@ -210,13 +210,19 @@ fman@400000 { ethernet@e0000 { - fixed-link = <0 1 1000 0 0>; - phy-connection-type = "sgmii"; + phy-mode = "sgmii"; + fixed-link { + speed = <1000>; + full-duplex; + }; }; ethernet@e2000 { - fixed-link = <1 1 1000 0 0>; - phy-connection-type = "sgmii"; + phy-mode = "sgmii"; + fixed-link { + speed = <1000>; + full-duplex; + }; }; ethernet@e4000 { @@ -229,7 +235,7 @@ ethernet@e8000 { phy-handle = <&front_phy>; - phy-connection-type = "rgmii"; + phy-mode = "rgmii-id"; }; mdio0: mdio@fc000 { From 3e4282e484b3b45f1de3f2e9e6c8b192d3ac9fcf Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 23 Oct 2019 14:54:48 +0200 Subject: [PATCH 115/144] powerpc/85xx: remove mostly pointless mpc85xx_qe_init() Since commit 302c059f2e7b (QE: use subsys_initcall to init qe), mpc85xx_qe_init() has done nothing apart from possibly emitting a pr_err(). As part of reducing the amount of QE-related code in arch/powerpc/ (and eventually support QE on other architectures), remove this low-hanging fruit. Signed-off-by: Rasmus Villemoes Signed-off-by: Scott Wood --- arch/powerpc/platforms/85xx/common.c | 23 ------------------- arch/powerpc/platforms/85xx/corenet_generic.c | 2 -- arch/powerpc/platforms/85xx/mpc85xx.h | 2 -- arch/powerpc/platforms/85xx/mpc85xx_mds.c | 1 - arch/powerpc/platforms/85xx/mpc85xx_rdb.c | 1 - arch/powerpc/platforms/85xx/twr_p102x.c | 1 - 6 files changed, 30 deletions(-) diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index fe0606439b5aec..a554b6d87cf76a 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -86,29 +86,6 @@ void __init mpc85xx_cpm2_pic_init(void) #endif #ifdef CONFIG_QUICC_ENGINE -void __init mpc85xx_qe_init(void) -{ - struct device_node *np; - - np = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!np) { - np = of_find_node_by_name(NULL, "qe"); - if (!np) { - pr_err("%s: Could not find Quicc Engine node\n", - __func__); - return; - } - } - - if (!of_device_is_available(np)) { - of_node_put(np); - return; - } - - of_node_put(np); - -} - void __init mpc85xx_qe_par_io_init(void) { struct device_node *np; diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 7ee2c6628f6462..a328a741b45728 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -66,8 +66,6 @@ void __init corenet_gen_setup_arch(void) swiotlb_detect_4g(); pr_info("%s board\n", ppc_md.name); - - mpc85xx_qe_init(); } static const struct of_device_id of_device_ids[] = { diff --git a/arch/powerpc/platforms/85xx/mpc85xx.h b/arch/powerpc/platforms/85xx/mpc85xx.h index fa23f9b0592ca4..cb84c5c56c361c 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx.h +++ b/arch/powerpc/platforms/85xx/mpc85xx.h @@ -10,10 +10,8 @@ static inline void __init mpc85xx_cpm2_pic_init(void) {} #endif /* CONFIG_CPM2 */ #ifdef CONFIG_QUICC_ENGINE -extern void mpc85xx_qe_init(void); extern void mpc85xx_qe_par_io_init(void); #else -static inline void __init mpc85xx_qe_init(void) {} static inline void __init mpc85xx_qe_par_io_init(void) {} #endif diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index 5ca254256c475e..120633f99ea6da 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -238,7 +238,6 @@ static void __init mpc85xx_mds_qe_init(void) { struct device_node *np; - mpc85xx_qe_init(); mpc85xx_qe_par_io_init(); mpc85xx_mds_reset_ucc_phys(); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c index d3c540ee558ff5..7f9a84f8576647 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c @@ -89,7 +89,6 @@ static void __init mpc85xx_rdb_setup_arch(void) fsl_pci_assign_primary(); #ifdef CONFIG_QUICC_ENGINE - mpc85xx_qe_init(); mpc85xx_qe_par_io_init(); #if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE) if (machine_is(p1025_rdb)) { diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c index 720b0c0f03bafd..6c3c0cdaee9ad2 100644 --- a/arch/powerpc/platforms/85xx/twr_p102x.c +++ b/arch/powerpc/platforms/85xx/twr_p102x.c @@ -72,7 +72,6 @@ static void __init twr_p1025_setup_arch(void) fsl_pci_assign_primary(); #ifdef CONFIG_QUICC_ENGINE - mpc85xx_qe_init(); mpc85xx_qe_par_io_init(); #if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE) From 3a0990ca1a00cb9fd0278410b9b71a670ffcbffc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 25 Oct 2019 11:29:01 +0200 Subject: [PATCH 116/144] powerpc/booke: Spelling s/date/data/ Caching dates is never a good idea ;-) Fixes: e7affb1dba0e9068 ("powerpc/cache: add cache flush operation for various e500") Signed-off-by: Geert Uytterhoeven Signed-off-by: Scott Wood --- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 2b4f3ec0acf7d9..1d308780e0d31e 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -231,7 +231,7 @@ _GLOBAL(__setup_cpu_e5500) blr #endif -/* flush L1 date cache, it can apply to e500v2, e500mc and e5500 */ +/* flush L1 data cache, it can apply to e500v2, e500mc and e5500 */ _GLOBAL(flush_dcache_L1) mfmsr r10 wrteei 0 From a76bea0287ce13d28494b19649d80d8ee5e7b757 Mon Sep 17 00:00:00 2001 From: Valentin Longchamp Date: Tue, 12 Nov 2019 20:56:23 +0100 Subject: [PATCH 117/144] powerpc/kmcent2: add ranges to the pci bridges This removes the warnings about the fact that the 4 pci bridges (i.e. the 4 pci hosts) don't have any ranges. Signed-off-by: Valentin Longchamp Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/kmcent2.dts | 36 +++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/arch/powerpc/boot/dts/fsl/kmcent2.dts b/arch/powerpc/boot/dts/fsl/kmcent2.dts index c3e0741cafb1e5..8e7f0828af295b 100644 --- a/arch/powerpc/boot/dts/fsl/kmcent2.dts +++ b/arch/powerpc/boot/dts/fsl/kmcent2.dts @@ -264,14 +264,50 @@ pci1: pcie@ffe250000 { status = "disabled"; + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000 + 0x01000000 0 0 0xf 0xf8010000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; }; pci2: pcie@ffe260000 { status = "disabled"; + reg = <0xf 0xfe260000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; }; pci3: pcie@ffe270000 { status = "disabled"; + reg = <0xf 0xfe270000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; }; qe: qe@ffe140000 { From 43f003bb74b9b27da6e719cfc2f7630f5652665a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 19 Aug 2019 13:06:30 +0000 Subject: [PATCH 118/144] powerpc: Refactor BUG/WARN macros BUG(), WARN() and friends are using a similar inline assembly to implement various traps with various flags. Lets refactor via a new BUG_ENTRY() macro. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c19a82b37677ace0eebb0dc8c2120373c29c8dd1.1566219503.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/bug.h | 41 +++++++++++++--------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index f47e6ff6554d0f..338f36cd9934bc 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -49,6 +49,15 @@ ".previous\n" #endif +#define BUG_ENTRY(insn, flags, ...) \ + __asm__ __volatile__( \ + "1: " insn "\n" \ + _EMIT_BUG_ENTRY \ + : : "i" (__FILE__), "i" (__LINE__), \ + "i" (flags), \ + "i" (sizeof(struct bug_entry)), \ + ##__VA_ARGS__) + /* * BUG_ON() and WARN_ON() do their best to cooperate with compile-time * optimisations. However depending on the complexity of the condition @@ -56,11 +65,7 @@ */ #define BUG() do { \ - __asm__ __volatile__( \ - "1: twi 31,0,0\n" \ - _EMIT_BUG_ENTRY \ - : : "i" (__FILE__), "i" (__LINE__), \ - "i" (0), "i" (sizeof(struct bug_entry))); \ + BUG_ENTRY("twi 31, 0, 0", 0); \ unreachable(); \ } while (0) @@ -69,23 +74,11 @@ if (x) \ BUG(); \ } else { \ - __asm__ __volatile__( \ - "1: "PPC_TLNEI" %4,0\n" \ - _EMIT_BUG_ENTRY \ - : : "i" (__FILE__), "i" (__LINE__), "i" (0), \ - "i" (sizeof(struct bug_entry)), \ - "r" ((__force long)(x))); \ + BUG_ENTRY(PPC_TLNEI " %4, 0", 0, "r" ((__force long)(x))); \ } \ } while (0) -#define __WARN_FLAGS(flags) do { \ - __asm__ __volatile__( \ - "1: twi 31,0,0\n" \ - _EMIT_BUG_ENTRY \ - : : "i" (__FILE__), "i" (__LINE__), \ - "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry))); \ -} while (0) +#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags)) #define WARN_ON(x) ({ \ int __ret_warn_on = !!(x); \ @@ -93,13 +86,9 @@ if (__ret_warn_on) \ __WARN(); \ } else { \ - __asm__ __volatile__( \ - "1: "PPC_TLNEI" %4,0\n" \ - _EMIT_BUG_ENTRY \ - : : "i" (__FILE__), "i" (__LINE__), \ - "i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\ - "i" (sizeof(struct bug_entry)), \ - "r" (__ret_warn_on)); \ + BUG_ENTRY(PPC_TLNEI " %4, 0", \ + BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \ + "r" (__ret_warn_on)); \ } \ unlikely(__ret_warn_on); \ }) From c4028fa2daa059ac9231ab3a4f57cbae814b3625 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 21 Aug 2019 10:13:32 +0000 Subject: [PATCH 119/144] powerpc/mm: drop #ifdef CONFIG_MMU in is_ioremap_addr() powerpc always selects CONFIG_MMU and CONFIG_MMU is not checked anywhere else in powerpc code. Drop the #ifdef and the alternative part of is_ioremap_addr() Fixes: 9bd3bb6703d8 ("mm/nvdimm: add is_ioremap_addr and use that to check ioremap address") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/de395e444fb8dd7a6365c3314d78e15ebb3d7d1b.1566382245.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/pgtable.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 4053b2ab427cc5..0e4ec8cc37b7b6 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -157,13 +157,9 @@ static inline bool pgd_is_leaf(pgd_t pgd) #define is_ioremap_addr is_ioremap_addr static inline bool is_ioremap_addr(const void *x) { -#ifdef CONFIG_MMU unsigned long addr = (unsigned long)x; return addr >= IOREMAP_BASE && addr < IOREMAP_END; -#else - return false; -#endif } #endif /* CONFIG_PPC64 */ From 46ddcb3950a28c0df4815e8dbb8d4b91d5d9f22d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 21 Aug 2019 15:21:55 +0000 Subject: [PATCH 120/144] powerpc/mm: Show if a bad page fault on data is read or write. DSISR (or ESR on some CPUs) has a bit to tell if the fault is due to a read or a write. Display it. Signed-off-by: Christophe Leroy Reviewed-by: Santosh Sivaraj Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4f88d7e6fda53b5f80a71040ab400242f6c8cb93.1566400889.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/fault.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8432c281de9214..b5047f9b5dec4d 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -645,6 +645,7 @@ NOKPROBE_SYMBOL(do_page_fault); void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { const struct exception_table_entry *entry; + int is_write = page_fault_is_write(regs->dsisr); /* Are we prepared to handle this fault? */ if ((entry = search_exception_tables(regs->nip)) != NULL) { @@ -658,9 +659,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) case 0x300: case 0x380: case 0xe00: - pr_alert("BUG: %s at 0x%08lx\n", + pr_alert("BUG: %s on %s at 0x%08lx\n", regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : - "Unable to handle kernel data access", regs->dar); + "Unable to handle kernel data access", + is_write ? "write" : "read", regs->dar); break; case 0x400: case 0x480: From a2227a27774328507a5c2335a6dd600c079d1ff5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 23 Aug 2019 09:56:21 +0000 Subject: [PATCH 121/144] powerpc/32: Don't populate page tables for block mapped pages except on the 8xx. Commit d2f15e0979ee ("powerpc/32: always populate page tables for Abatron BDI.") wrongly sets page tables for any PPC32 for using BDI, and does't update them after init (remove RX on init section, set text and rodata read-only) Only the 8xx requires page tables to be populated for using the BDI. They also need to be populated in order to see the mappings in /sys/kernel/debug/kernel_page_tables On BOOK3S_32, pages that are not mapped by page tables are mapped by BATs. The BDI knows BATs and they can be viewed in /sys/kernel/debug/powerpc/block_address_translation Only set pagetables for RAM and IMMR on the 8xx and properly update them at the end of init. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c8610942203e0d93fcb02ad20c57edd3adb4c9d3.1566554029.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/nohash/8xx.c | 52 +++++++++++++++++++++++++++++++++--- arch/powerpc/mm/pgtable_32.c | 5 +--- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 4a06cb342da2e2..090af2d2d3e4c5 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -103,6 +103,19 @@ static void mmu_patch_addis(s32 *site, long simm) patch_instruction_site(site, instr); } +void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot) +{ + unsigned long s = offset; + unsigned long v = PAGE_OFFSET + s; + phys_addr_t p = memstart_addr + s; + + for (; s < top; s += PAGE_SIZE) { + map_kernel_page(v, p, prot); + v += PAGE_SIZE; + p += PAGE_SIZE; + } +} + unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long mapped; @@ -115,10 +128,20 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); } else { + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, - _ALIGN(__pa(_einittext), 8 << 20)); + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, einittext8); + + /* + * Populate page tables to: + * - have them appear in /sys/kernel/debug/kernel_page_tables + * - allow the BDI to find the pages when they are not PINNED + */ + mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X); + mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL); + mmu_mapin_immr(); } mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); @@ -144,18 +167,41 @@ void mmu_mark_initmem_nx(void) if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23) mmu_patch_addis(&patch__itlbmiss_linmem_top8, -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1))); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) { + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); + unsigned long etext = __pa(_etext); + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext)); + + /* Update page tables for PTDUMP and BDI */ + mmu_mapin_ram_chunk(0, einittext8, __pgprot(0)); + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { + mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT); + mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL); + } else { + mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT); + mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL); + } + } } #ifdef CONFIG_STRICT_KERNEL_RWX void mmu_mark_rodata_ro(void) { + unsigned long sinittext = __pa(_sinittext); + unsigned long etext = __pa(_etext); + if (CONFIG_DATA_SHIFT < 23) mmu_patch_addis(&patch__dtlbmiss_romem_top8, -__pa(((unsigned long)_sinittext) & ~(LARGE_PAGE_SIZE_8M - 1))); mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext)); + + /* Update page tables for PTDUMP and BDI */ + mmu_mapin_ram_chunk(0, sinittext, __pgprot(0)); + mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX); + mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO); } #endif diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 8ec5dfb65b2eb1..73b84166d06a69 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -117,10 +117,7 @@ void __init mapin_ram(void) if (base >= top) continue; base = mmu_mapin_ram(base, top); - if (IS_ENABLED(CONFIG_BDI_SWITCH)) - __mapin_ram_chunk(reg->base, top); - else - __mapin_ram_chunk(base, top); + __mapin_ram_chunk(base, top); } } From b06174345f6e70200916136695514e0b6b95ac17 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 28 Aug 2019 13:42:01 +0000 Subject: [PATCH 122/144] powerpc/reg: use ASM_FTR_IFSET() instead of opencoding fixup. mftb() includes a feature fixup for CELL ppc. Use ASM_FTR_IFSET() macro instead of opencoding the setup of the fixup sections. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ac19713826fa55e9e7bfe3100c5a7b1712ab9526.1566999711.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/reg.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b3cbb1136bce08..f3f368819f9c0f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1384,19 +1384,9 @@ static inline void msr_check_and_clear(unsigned long bits) #define mftb() ({unsigned long rval; \ asm volatile( \ "90: mfspr %0, %2;\n" \ - "97: cmpwi %0,0;\n" \ - " beq- 90b;\n" \ - "99:\n" \ - ".section __ftr_fixup,\"a\"\n" \ - ".align 3\n" \ - "98:\n" \ - " .8byte %1\n" \ - " .8byte %1\n" \ - " .8byte 97b-98b\n" \ - " .8byte 99b-98b\n" \ - " .8byte 0\n" \ - " .8byte 0\n" \ - ".previous" \ + ASM_FTR_IFSET( \ + "97: cmpwi %0,0;\n" \ + " beq- 90b;\n", "", %1) \ : "=r" (rval) \ : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ rval;}) From 44448640dd0df98891c5ea4695d89a4972cb4c1f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 29 Aug 2019 08:45:12 +0000 Subject: [PATCH 123/144] powerpc: permanently include 8xx registers in reg.h Most 8xx registers have specific names, so just include reg_8xx.h all the time in reg.h in order to have them defined even when CONFIG_PPC_8xx is not selected. This will avoid the need for #ifdefs in C code. Guard SPRN_ICTRL in an #ifdef CONFIG_PPC_8xx as this register has same name but different meaning and different spr number as another register in the mpc7450. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/dd82934ad91aab607d0eb7e626c14e6ac0d654eb.1567068137.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 1 + arch/powerpc/include/asm/reg.h | 2 -- arch/powerpc/include/asm/reg_8xx.h | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 1c3133b5f86aa3..1006a427e99cce 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_KUP_8XX_H_ #include +#include #ifdef CONFIG_PPC_KUAP diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index f3f368819f9c0f..b7faf6c781a730 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -25,9 +25,7 @@ #include #endif -#ifdef CONFIG_PPC_8xx #include -#endif /* CONFIG_PPC_8xx */ #define MSR_SF_LG 63 /* Enable 64 bit mode */ #define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */ diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h index 7192eece6c3e1f..07df35ee8cbcfe 100644 --- a/arch/powerpc/include/asm/reg_8xx.h +++ b/arch/powerpc/include/asm/reg_8xx.h @@ -5,8 +5,6 @@ #ifndef _ASM_POWERPC_REG_8xx_H #define _ASM_POWERPC_REG_8xx_H -#include - /* Cache control on the MPC8xx is provided through some additional * special purpose registers. */ @@ -38,7 +36,9 @@ #define SPRN_CMPF 153 #define SPRN_LCTRL1 156 #define SPRN_LCTRL2 157 +#ifdef CONFIG_PPC_8xx #define SPRN_ICTRL 158 +#endif #define SPRN_BAR 159 /* Commands. Only the first few are available to the instruction cache. From b020aa9d1e875c1c91b1390acdf42320e7060d84 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 29 Aug 2019 08:45:13 +0000 Subject: [PATCH 124/144] powerpc: cleanup hw_irq.h SET_MSR_EE() is just use in this file and doesn't provide any added value compared to mtmsr(). Drop it. Add a wrtee() inline function to use wrtee/wrteei insn. Replace #ifdefs by IS_ENABLED() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a28a20514d5f6df9629c1a117b667e48c4272736.1567068137.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/hw_irq.h | 57 ++++++++++++++----------------- arch/powerpc/include/asm/reg.h | 8 +++++ 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 32a18f2f49bc80..e3a905e3d5736e 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -226,8 +226,8 @@ static inline bool arch_irqs_disabled(void) #endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_BOOK3E -#define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory") -#define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory") +#define __hard_irq_enable() wrtee(MSR_EE) +#define __hard_irq_disable() wrtee(0) #else #define __hard_irq_enable() __mtmsrd(MSR_EE|MSR_RI, 1) #define __hard_irq_disable() __mtmsrd(MSR_RI, 1) @@ -280,8 +280,6 @@ extern void force_external_irq_replay(void); #else /* CONFIG_PPC64 */ -#define SET_MSR_EE(x) mtmsr(x) - static inline unsigned long arch_local_save_flags(void) { return mfmsr(); @@ -289,47 +287,44 @@ static inline unsigned long arch_local_save_flags(void) static inline void arch_local_irq_restore(unsigned long flags) { -#if defined(CONFIG_BOOKE) - asm volatile("wrtee %0" : : "r" (flags) : "memory"); -#else - mtmsr(flags); -#endif + if (IS_ENABLED(CONFIG_BOOKE)) + wrtee(flags); + else + mtmsr(flags); } static inline unsigned long arch_local_irq_save(void) { unsigned long flags = arch_local_save_flags(); -#ifdef CONFIG_BOOKE - asm volatile("wrteei 0" : : : "memory"); -#elif defined(CONFIG_PPC_8xx) - wrtspr(SPRN_EID); -#else - SET_MSR_EE(flags & ~MSR_EE); -#endif + + if (IS_ENABLED(CONFIG_BOOKE)) + wrtee(0); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EID); + else + mtmsr(flags & ~MSR_EE); + return flags; } static inline void arch_local_irq_disable(void) { -#ifdef CONFIG_BOOKE - asm volatile("wrteei 0" : : : "memory"); -#elif defined(CONFIG_PPC_8xx) - wrtspr(SPRN_EID); -#else - arch_local_irq_save(); -#endif + if (IS_ENABLED(CONFIG_BOOKE)) + wrtee(0); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EID); + else + mtmsr(mfmsr() & ~MSR_EE); } static inline void arch_local_irq_enable(void) { -#ifdef CONFIG_BOOKE - asm volatile("wrteei 1" : : : "memory"); -#elif defined(CONFIG_PPC_8xx) - wrtspr(SPRN_EIE); -#else - unsigned long msr = mfmsr(); - SET_MSR_EE(msr | MSR_EE); -#endif + if (IS_ENABLED(CONFIG_BOOKE)) + wrtee(MSR_EE); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EIE); + else + mtmsr(mfmsr() | MSR_EE); } static inline bool arch_irqs_disabled_flags(unsigned long flags) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b7faf6c781a730..0b7900f194c8f7 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1368,6 +1368,14 @@ static inline void mtmsr_isync(unsigned long val) #define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",0" : \ : : "memory") +static inline void wrtee(unsigned long val) +{ + if (__builtin_constant_p(val)) + asm volatile("wrteei %0" : : "i" ((val & MSR_EE) ? 1 : 0) : "memory"); + else + asm volatile("wrtee %0" : : "r" (val) : "memory"); +} + extern unsigned long msr_check_and_set(unsigned long bits); extern bool strict_msr_control; extern void __msr_check_and_clear(unsigned long bits); From 132f92fdc42782fd297e076ef74bedeb8ce774e4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 12 Sep 2019 13:22:55 +0000 Subject: [PATCH 125/144] powerpc/8xx: add __init to cpm1 init functions Functions cpm1_clk_setup(), cpm1_set_pin(), cpm_pic_init() and mpc8xx_pic_init() are only called from __init functions, so mark them __init as well. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c27168ef054f3a52edcf0ff91652700d53b3e32d.1568294563.git.christophe.leroy@c-s.fr --- arch/powerpc/platforms/8xx/cpm1.c | 10 +++++----- arch/powerpc/platforms/8xx/pic.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 0f65c51271db9e..81a1a2eb116a49 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -130,7 +130,7 @@ static const struct irq_domain_ops cpm_pic_host_ops = { .map = cpm_pic_host_map, }; -unsigned int cpm_pic_init(void) +unsigned int __init cpm_pic_init(void) { struct device_node *np = NULL; struct resource res; @@ -306,7 +306,7 @@ struct cpm_ioport32e { __be32 dir, par, sor, odr, dat; }; -static void cpm1_set_pin32(int port, int pin, int flags) +static void __init cpm1_set_pin32(int port, int pin, int flags) { struct cpm_ioport32e __iomem *iop; pin = 1 << (31 - pin); @@ -348,7 +348,7 @@ static void cpm1_set_pin32(int port, int pin, int flags) } } -static void cpm1_set_pin16(int port, int pin, int flags) +static void __init cpm1_set_pin16(int port, int pin, int flags) { struct cpm_ioport16 __iomem *iop = (struct cpm_ioport16 __iomem *)&mpc8xx_immr->im_ioport; @@ -386,7 +386,7 @@ static void cpm1_set_pin16(int port, int pin, int flags) } } -void cpm1_set_pin(enum cpm_port port, int pin, int flags) +void __init cpm1_set_pin(enum cpm_port port, int pin, int flags) { if (port == CPM_PORTB || port == CPM_PORTE) cpm1_set_pin32(port, pin, flags); @@ -394,7 +394,7 @@ void cpm1_set_pin(enum cpm_port port, int pin, int flags) cpm1_set_pin16(port, pin, flags); } -int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode) +int __init cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode) { int shift; int i, bits = 0; diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index e9617d35fd1f6c..f2ba837249d694 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -125,7 +125,7 @@ static const struct irq_domain_ops mpc8xx_pic_host_ops = { .xlate = mpc8xx_pic_host_xlate, }; -int mpc8xx_pic_init(void) +int __init mpc8xx_pic_init(void) { struct resource res; struct device_node *np; From eafd687e689acd99d780e468d6a0622f4694d0bc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 12 Sep 2019 13:29:07 +0000 Subject: [PATCH 126/144] powerpc/8xx: use the fixmapped IMMR in cpm_reset() Since commit f86ef74ed919 ("powerpc/8xx: Fix vaddr for IMMR early remap"), the IMMR area has been mapped at startup with fixmap. Use that fixmap directly instead of calling ioremap(), this avoids calling ioremap() early before the slab is available. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f816ccdbd15b97cf43c5a8c7cc8dfa8db58ff036.1568294935.git.christophe.leroy@c-s.fr --- arch/powerpc/platforms/8xx/cpm1.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 81a1a2eb116a49..a43ee7d1ff856a 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -51,7 +51,7 @@ #define CPM_MAP_SIZE (0x4000) cpm8xx_t __iomem *cpmp; /* Pointer to comm processor space */ -immap_t __iomem *mpc8xx_immr; +immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE; static cpic8xx_t __iomem *cpic_reg; static struct irq_domain *cpm_pic_host; @@ -201,12 +201,6 @@ void __init cpm_reset(void) { sysconf8xx_t __iomem *siu_conf; - mpc8xx_immr = ioremap(get_immrbase(), 0x4000); - if (!mpc8xx_immr) { - printk(KERN_CRIT "Could not map IMMR\n"); - return; - } - cpmp = &mpc8xx_immr->im_cpm; #ifndef CONFIG_PPC_EARLY_DEBUG_CPM From 77693a5fb57be4606a6024ec8e3076f9499b906b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 12 Sep 2019 13:49:42 +0000 Subject: [PATCH 127/144] powerpc/fixmap: Use __fix_to_virt() instead of fix_to_virt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify back __set_fixmap() to using __fix_to_virt() instead of fix_to_virt() otherwise the following happens because it seems GCC doesn't see idx as a builtin const. CC mm/early_ioremap.o In file included from ./include/linux/kernel.h:11:0, from mm/early_ioremap.c:11: In function ‘fix_to_virt’, inlined from ‘__set_fixmap’ at ./arch/powerpc/include/asm/fixmap.h:87:2, inlined from ‘__early_ioremap’ at mm/early_ioremap.c:156:4: ./include/linux/compiler.h:350:38: error: call to ‘__compiletime_assert_32’ declared with attribute error: BUILD_BUG_ON failed: idx >= __end_of_fixed_addresses _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^ ./include/linux/compiler.h:331:4: note: in definition of macro ‘__compiletime_assert’ prefix ## suffix(); \ ^ ./include/linux/compiler.h:350:2: note: in expansion of macro ‘_compiletime_assert’ _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^ ./include/linux/build_bug.h:39:37: note: in expansion of macro ‘compiletime_assert’ #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^ ./include/linux/build_bug.h:50:2: note: in expansion of macro ‘BUILD_BUG_ON_MSG’ BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) ^ ./include/asm-generic/fixmap.h:32:2: note: in expansion of macro ‘BUILD_BUG_ON’ BUILD_BUG_ON(idx >= __end_of_fixed_addresses); ^ Signed-off-by: Christophe Leroy Fixes: 4cfac2f9c7f1 ("powerpc/mm: Simplify __set_fixmap()") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f4984c615f90caa3277775a68849afeea846850d.1568295907.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/fixmap.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 0cfc365d814bae..722289a1d000e9 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -77,7 +77,12 @@ enum fixed_addresses { static inline void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { - map_kernel_page(fix_to_virt(idx), phys, flags); + if (__builtin_constant_p(idx)) + BUILD_BUG_ON(idx >= __end_of_fixed_addresses); + else if (WARN_ON(idx >= __end_of_fixed_addresses)) + return; + + map_kernel_page(__fix_to_virt(idx), phys, flags); } #endif /* !__ASSEMBLY__ */ From 265c3491c4bc8d40587996d6ee2f447a7ccfb4f3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 12 Sep 2019 13:49:43 +0000 Subject: [PATCH 128/144] powerpc: Add support for GENERIC_EARLY_IOREMAP Add support for GENERIC_EARLY_IOREMAP. Let's define 16 slots of 256Kbytes each for early ioremap. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/412c7eaa6a373d8f82a3c3ee01e6a65a1a6589de.1568295907.git.christophe.leroy@c-s.fr --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/fixmap.h | 12 ++++++++++++ arch/powerpc/kernel/setup_32.c | 3 +++ arch/powerpc/kernel/setup_64.c | 3 +++ 5 files changed, 20 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 09dddb849671de..e446bb5b3f8d77 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -161,6 +161,7 @@ config PPC select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC + select GENERIC_EARLY_IOREMAP select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP if PCI diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 8dbf85d5da0e22..148bee20e7e224 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -12,3 +12,4 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += vtime.h generic-y += msi.h +generic-y += early_ioremap.h diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 722289a1d000e9..d5c4d357bd33e8 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -15,6 +15,7 @@ #define _ASM_FIXMAP_H #ifndef __ASSEMBLY__ +#include #include #include #ifdef CONFIG_HIGHMEM @@ -64,6 +65,14 @@ enum fixed_addresses { FIX_IMMR_SIZE, #endif /* FIX_PCIE_MCFG, */ + __end_of_permanent_fixed_addresses, + +#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE) +#define FIX_BTMAPS_SLOTS 16 +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + + FIX_BTMAP_END = __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, __end_of_fixed_addresses }; @@ -71,6 +80,7 @@ enum fixed_addresses { #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) #define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG +#define FIXMAP_PAGE_IO PAGE_KERNEL_NCG #include @@ -85,5 +95,7 @@ static inline void __set_fixmap(enum fixed_addresses idx, map_kernel_page(__fix_to_virt(idx), phys, flags); } +#define __early_set_fixmap __set_fixmap + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index a7541edf0cdb6f..dcffe927f5b93e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "setup.h" @@ -80,6 +81,8 @@ notrace void __init machine_init(u64 dt_ptr) /* Configure static keys first, now that we're relocated. */ setup_feature_keys(); + early_ioremap_setup(); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d2af4c228970e6..6104917a282d6c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -65,6 +65,7 @@ #include #include #include +#include #include "setup.h" @@ -332,6 +333,8 @@ void __init early_setup(unsigned long dt_ptr) apply_feature_fixups(); setup_feature_keys(); + early_ioremap_setup(); + /* Initialize the hash table or TLB handling */ early_init_mmu(); From d538aadc2718a95bfd80095c66ea814824535b34 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 12 Sep 2019 13:49:44 +0000 Subject: [PATCH 129/144] powerpc/ioremap: warn on early use of ioremap() Powerpc now has EARLY_IOREMAP. Next step is to convert all early users of ioremap() to early_ioremap(). Add a warning to help locate those users. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b4f03a68ee8e68773c8973d74ec35f9c82c72871.1568295907.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/ioremap_32.c | 1 + arch/powerpc/mm/ioremap_64.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c index f36121f25243f6..743e11384deab8 100644 --- a/arch/powerpc/mm/ioremap_32.c +++ b/arch/powerpc/mm/ioremap_32.c @@ -68,6 +68,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call /* * Should check if it is a candidate for a BAT mapping */ + pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller); err = early_ioremap_range(ioremap_bot - size, p, size, prot); if (err) diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c index fd29e51700cd0e..50a99d9684f7d8 100644 --- a/arch/powerpc/mm/ioremap_64.c +++ b/arch/powerpc/mm/ioremap_64.c @@ -81,6 +81,8 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, if (slab_is_available()) return do_ioremap(paligned, offset, size, prot, caller); + pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller); + err = early_ioremap_range(ioremap_bot, paligned, size, prot); if (err) return NULL; From cbcaff7d27ad5c5d2c2db113ec489be88adb815a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 16 Sep 2019 20:25:39 +0000 Subject: [PATCH 130/144] powerpc/32s: automatically allocate BAT in setbat() If no BAT is given to setbat(), select an available BAT. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a212bd36fbd6179e0929b6c727febc35132ac25c.1568665466.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/book3s32/mmu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 84d5fab94f8f82..69b2419accef46 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -251,9 +251,18 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, { unsigned int bl; int wimgxpp; - struct ppc_bat *bat = BATS[index]; + struct ppc_bat *bat; unsigned long flags = pgprot_val(prot); + if (index == -1) + index = find_free_bat(); + if (index == -1) { + pr_err("%s: no BAT available for mapping 0x%llx\n", __func__, + (unsigned long long)phys); + return; + } + bat = BATS[index]; + if ((flags & _PAGE_NO_CACHE) || (cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0)) flags &= ~_PAGE_COHERENT; From 6b7c095a51e1bad8b27fa1bc8753af0b613464a3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 16 Sep 2019 20:25:41 +0000 Subject: [PATCH 131/144] powerpc/83xx: map IMMR with a BAT. On mpc83xx with a QE, IMMR is 2Mbytes and aligned on 2Mbytes boundarie. On mpc83xx without a QE, IMMR is 1Mbyte and 1Mbyte aligned. Each driver will map a part of it to access the registers it needs. Some drivers will map the same part of IMMR as other drivers. In order to reduce TLB misses, map the full IMMR with a BAT. If it is 2Mbytes aligned, map 2Mbytes. If there is no QE, the upper part will remain unused, but it doesn't harm as it is mapped as guarded memory. When the IMMR is not aligned on a 2Mbytes boundarie, only map 1Mbyte. Signed-off-by: Christophe Leroy Acked-by: Scott Wood Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/269a00951328fb6fa1be2fa3cbc76c19745019b7.1568665466.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/fixmap.h | 7 +++++++ arch/powerpc/platforms/83xx/misc.c | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index d5c4d357bd33e8..2ef155a3c8214d 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -63,6 +63,13 @@ enum fixed_addresses { FIX_IMMR_START, FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 + FIX_IMMR_SIZE, +#endif +#ifdef CONFIG_PPC_83xx + /* For IMMR we need an aligned 2M area */ +#define FIX_IMMR_SIZE (SZ_2M / PAGE_SIZE) + FIX_IMMR_START, + FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 + + FIX_IMMR_SIZE, #endif /* FIX_PCIE_MCFG, */ __end_of_permanent_fixed_addresses, diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index f46d7bf3b14019..6399865a625e52 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -18,6 +18,8 @@ #include #include +#include + #include "mpc83xx.h" static __be32 __iomem *restart_reg_base; @@ -145,6 +147,15 @@ void __init mpc83xx_setup_arch(void) if (ppc_md.progress) ppc_md.progress("mpc83xx_setup_arch()", 0); + if (!__map_without_bats) { + phys_addr_t immrbase = get_immrbase(); + int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M; + unsigned long va = fix_to_virt(FIX_IMMR_BASE); + + setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG); + update_bats(); + } + mpc83xx_setup_pci(); } From 8795a739e5c72abeec51caf36b6df2b37e5720c5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 31 Oct 2019 13:47:30 +0000 Subject: [PATCH 132/144] powerpc/sysdev: drop simple gpio There is a config item CONFIG_SIMPLE_GPIO which provides simple memory mapped GPIOs specific to powerpc. However, the only platform which selects this option is mpc5200, and this platform doesn't use it. There are three boards calling simple_gpiochip_init(), but as they don't select CONFIG_SIMPLE_GPIO, this is just a nop. Simple_gpio is just redundant with the generic MMIO GPIO driver which can be found in driver/gpio/ and selected via CONFIG_GPIO_GENERIC_PLATFORM, so drop simple_gpio driver. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bf930402613b41b42d0441b784e0cc43fc18d1fb.1572529632.git.christophe.leroy@c-s.fr --- .../devicetree/bindings/board/fsl-board.txt | 30 ---- arch/powerpc/configs/mpc5200_defconfig | 1 - arch/powerpc/platforms/83xx/mpc836x_mds.c | 7 - arch/powerpc/platforms/85xx/mpc85xx_mds.c | 6 - arch/powerpc/platforms/86xx/mpc8610_hpcd.c | 4 - arch/powerpc/platforms/Kconfig | 10 -- arch/powerpc/sysdev/Makefile | 1 - arch/powerpc/sysdev/simple_gpio.c | 143 ------------------ arch/powerpc/sysdev/simple_gpio.h | 13 -- 9 files changed, 215 deletions(-) delete mode 100644 arch/powerpc/sysdev/simple_gpio.c delete mode 100644 arch/powerpc/sysdev/simple_gpio.h diff --git a/Documentation/devicetree/bindings/board/fsl-board.txt b/Documentation/devicetree/bindings/board/fsl-board.txt index eb52f6b35159d2..9cde5701592193 100644 --- a/Documentation/devicetree/bindings/board/fsl-board.txt +++ b/Documentation/devicetree/bindings/board/fsl-board.txt @@ -47,36 +47,6 @@ Example (LS2080A-RDB): reg = <0x3 0 0x10000>; }; -* Freescale BCSR GPIO banks - -Some BCSR registers act as simple GPIO controllers, each such -register can be represented by the gpio-controller node. - -Required properities: -- compatible : Should be "fsl,-bcsr-gpio". -- reg : Should contain the address and the length of the GPIO bank - register. -- #gpio-cells : Should be two. The first cell is the pin number and the - second cell is used to specify optional parameters (currently unused). -- gpio-controller : Marks the port as GPIO controller. - -Example: - - bcsr@1,0 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,mpc8360mds-bcsr"; - reg = <1 0 0x8000>; - ranges = <0 1 0 0x8000>; - - bcsr13: gpio-controller@d { - #gpio-cells = <2>; - compatible = "fsl,mpc8360mds-bcsr-gpio"; - reg = <0xd 1>; - gpio-controller; - }; - }; - * Freescale on-board FPGA connected on I2C bus Some Freescale boards like BSC9132QDS have on board FPGA connected on diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig index 6f87a5c7496029..83d801307178d7 100644 --- a/arch/powerpc/configs/mpc5200_defconfig +++ b/arch/powerpc/configs/mpc5200_defconfig @@ -15,7 +15,6 @@ CONFIG_PPC_MEDIA5200=y CONFIG_PPC_MPC5200_BUGFIX=y CONFIG_PPC_MPC5200_LPBFIFO=m # CONFIG_PPC_PMAC is not set -CONFIG_SIMPLE_GPIO=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c index 4a4efa906d356d..240a26d88b07d1 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include @@ -181,12 +180,6 @@ static int __init mpc836x_usb_cfg(void) qe_usb_clock_set(QE_CLK21, 48000000); } else { setbits8(&bcsr[13], BCSR13_USBMODE); - /* - * The BCSR GPIOs are used to control power and - * speed of the USB transceiver. This is needed for - * the USB Host only. - */ - simple_gpiochip_init("fsl,mpc8360mds-bcsr-gpio"); } of_node_put(np); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index 120633f99ea6da..381a6ac8cb4bfb 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -350,11 +349,6 @@ machine_arch_initcall(mpc8569_mds, board_fixups); static int __init mpc85xx_publish_devices(void) { - if (machine_is(mpc8568_mds)) - simple_gpiochip_init("fsl,mpc8568mds-bcsr-gpio"); - if (machine_is(mpc8569_mds)) - simple_gpiochip_init("fsl,mpc8569mds-bcsr-gpio"); - return mpc85xx_common_publish_devices(); } diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c index 96b27f6fdd0f78..7733d0607da2e3 100644 --- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c +++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c @@ -34,7 +34,6 @@ #include #include #include -#include #include "mpc86xx.h" @@ -93,9 +92,6 @@ static const struct of_device_id mpc8610_ids[] __initconst = { static int __init mpc8610_declare_of_platform_devices(void) { - /* Firstly, register PIXIS GPIOs. */ - simple_gpiochip_init("fsl,fpga-pixis-gpio-bank"); - /* Enable wakeup on PIXIS' event IRQ. */ mpc8610_suspend_init(); diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index d82e3664ffdf86..e28df298df5635 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -303,16 +303,6 @@ config GEN_RTC replacing their get_rtc_time/set_rtc_time callbacks with a proper RTC device driver. -config SIMPLE_GPIO - bool "Support for simple, memory-mapped GPIO controllers" - depends on PPC - select GPIOLIB - help - Say Y here to support simple, memory-mapped GPIO controllers. - These are usually BCSRs used to control board's switches, LEDs, - chip-selects, Ethernet/USB PHY's power and various other small - on-board peripherals. - config MCU_MPC8349EMITX bool "MPC8349E-mITX MCU driver" depends on I2C=y && PPC_83xx diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 603b3c656d191e..cb5a5bd2cef548 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -24,7 +24,6 @@ obj-$(CONFIG_FSL_CORENET_RCPM) += fsl_rcpm.o obj-$(CONFIG_FSL_LBC) += fsl_lbc.o obj-$(CONFIG_FSL_GTM) += fsl_gtm.o obj-$(CONFIG_FSL_85XX_CACHE_SRAM) += fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o -obj-$(CONFIG_SIMPLE_GPIO) += simple_gpio.o obj-$(CONFIG_FSL_RIO) += fsl_rio.o fsl_rmu.o obj-$(CONFIG_TSI108_BRIDGE) += tsi108_pci.o tsi108_dev.o obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c deleted file mode 100644 index dc1740cd9e429f..00000000000000 --- a/arch/powerpc/sysdev/simple_gpio.c +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Simple Memory-Mapped GPIOs - * - * Copyright (c) MontaVista Software, Inc. 2008. - * - * Author: Anton Vorontsov - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "simple_gpio.h" - -struct u8_gpio_chip { - struct of_mm_gpio_chip mm_gc; - spinlock_t lock; - - /* shadowed data register to clear/set bits safely */ - u8 data; -}; - -static u8 u8_pin2mask(unsigned int pin) -{ - return 1 << (8 - 1 - pin); -} - -static int u8_gpio_get(struct gpio_chip *gc, unsigned int gpio) -{ - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - - return !!(in_8(mm_gc->regs) & u8_pin2mask(gpio)); -} - -static void u8_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) -{ - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct u8_gpio_chip *u8_gc = gpiochip_get_data(gc); - unsigned long flags; - - spin_lock_irqsave(&u8_gc->lock, flags); - - if (val) - u8_gc->data |= u8_pin2mask(gpio); - else - u8_gc->data &= ~u8_pin2mask(gpio); - - out_8(mm_gc->regs, u8_gc->data); - - spin_unlock_irqrestore(&u8_gc->lock, flags); -} - -static int u8_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio) -{ - return 0; -} - -static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) -{ - u8_gpio_set(gc, gpio, val); - return 0; -} - -static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc) -{ - struct u8_gpio_chip *u8_gc = - container_of(mm_gc, struct u8_gpio_chip, mm_gc); - - u8_gc->data = in_8(mm_gc->regs); -} - -static int __init u8_simple_gpiochip_add(struct device_node *np) -{ - int ret; - struct u8_gpio_chip *u8_gc; - struct of_mm_gpio_chip *mm_gc; - struct gpio_chip *gc; - - u8_gc = kzalloc(sizeof(*u8_gc), GFP_KERNEL); - if (!u8_gc) - return -ENOMEM; - - spin_lock_init(&u8_gc->lock); - - mm_gc = &u8_gc->mm_gc; - gc = &mm_gc->gc; - - mm_gc->save_regs = u8_gpio_save_regs; - gc->ngpio = 8; - gc->direction_input = u8_gpio_dir_in; - gc->direction_output = u8_gpio_dir_out; - gc->get = u8_gpio_get; - gc->set = u8_gpio_set; - - ret = of_mm_gpiochip_add_data(np, mm_gc, u8_gc); - if (ret) - goto err; - return 0; -err: - kfree(u8_gc); - return ret; -} - -void __init simple_gpiochip_init(const char *compatible) -{ - struct device_node *np; - - for_each_compatible_node(np, NULL, compatible) { - int ret; - struct resource r; - - ret = of_address_to_resource(np, 0, &r); - if (ret) - goto err; - - switch (resource_size(&r)) { - case 1: - ret = u8_simple_gpiochip_add(np); - if (ret) - goto err; - break; - default: - /* - * Whenever you need support for GPIO bank width > 1, - * please just turn u8_ code into huge macros, and - * construct needed uX_ code with it. - */ - ret = -ENOSYS; - goto err; - } - continue; -err: - pr_err("%pOF: registration failed, status %d\n", np, ret); - } -} diff --git a/arch/powerpc/sysdev/simple_gpio.h b/arch/powerpc/sysdev/simple_gpio.h deleted file mode 100644 index f3f3a20d39e27a..00000000000000 --- a/arch/powerpc/sysdev/simple_gpio.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __SYSDEV_SIMPLE_GPIO_H -#define __SYSDEV_SIMPLE_GPIO_H - -#include - -#ifdef CONFIG_SIMPLE_GPIO -extern void simple_gpiochip_init(const char *compatible); -#else -static inline void simple_gpiochip_init(const char *compatible) {} -#endif /* CONFIG_SIMPLE_GPIO */ - -#endif /* __SYSDEV_SIMPLE_GPIO_H */ From 9f7bd9201521b3ad11e96887550dd3e835ba01cb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Oct 2019 12:13:57 +0000 Subject: [PATCH 133/144] powerpc/32: Split kexec low level code out of misc_32.S Almost half of misc_32.S is dedicated to kexec. That's the relocation function for kexec. Drop it into a dedicated kexec_relocate_32.S Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e235973a1198195763afd3b6baffa548a83f4611.1572351221.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/kexec_relocate_32.S | 500 ++++++++++++++++++++++++ arch/powerpc/kernel/misc_32.S | 491 ----------------------- 3 files changed, 501 insertions(+), 491 deletions(-) create mode 100644 arch/powerpc/kernel/kexec_relocate_32.S diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index bb57d168d6f44c..fadbc1eb25861f 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -82,6 +82,7 @@ obj-$(CONFIG_FA_DUMP) += fadump.o obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o ifdef CONFIG_PPC32 obj-$(CONFIG_E500) += idle_e500.o +obj-$(CONFIG_KEXEC_CORE) += kexec_relocate_32.o endif obj-$(CONFIG_PPC_BOOK3S_32) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o obj-$(CONFIG_TAU) += tau_6xx.o diff --git a/arch/powerpc/kernel/kexec_relocate_32.S b/arch/powerpc/kernel/kexec_relocate_32.S new file mode 100644 index 00000000000000..8a8b4887c879c5 --- /dev/null +++ b/arch/powerpc/kernel/kexec_relocate_32.S @@ -0,0 +1,500 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file contains kexec low-level functions. + * + * Copyright (C) 2002-2003 Eric Biederman + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * PPC44x port. Copyright (C) 2011, IBM Corporation + * Author: Suzuki Poulose + */ + +#include +#include +#include +#include +#include + + .text + + /* + * Must be relocatable PIC code callable as a C function. + */ + .globl relocate_new_kernel +relocate_new_kernel: + /* r3 = page_list */ + /* r4 = reboot_code_buffer */ + /* r5 = start_address */ + +#ifdef CONFIG_FSL_BOOKE + + mr r29, r3 + mr r30, r4 + mr r31, r5 + +#define ENTRY_MAPPING_KEXEC_SETUP +#include "fsl_booke_entry_mapping.S" +#undef ENTRY_MAPPING_KEXEC_SETUP + + mr r3, r29 + mr r4, r30 + mr r5, r31 + + li r0, 0 +#elif defined(CONFIG_44x) + + /* Save our parameters */ + mr r29, r3 + mr r30, r4 + mr r31, r5 + +#ifdef CONFIG_PPC_47x + /* Check for 47x cores */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,PVR_476FPE@h + beq setup_map_47x + cmplwi cr0,r3,PVR_476@h + beq setup_map_47x + cmplwi cr0,r3,PVR_476_ISS@h + beq setup_map_47x +#endif /* CONFIG_PPC_47x */ + +/* + * Code for setting up 1:1 mapping for PPC440x for KEXEC + * + * We cannot switch off the MMU on PPC44x. + * So we: + * 1) Invalidate all the mappings except the one we are running from. + * 2) Create a tmp mapping for our code in the other address space(TS) and + * jump to it. Invalidate the entry we started in. + * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS. + * 4) Jump to the 1:1 mapping in original TS. + * 5) Invalidate the tmp mapping. + * + * - Based on the kexec support code for FSL BookE + * + */ + + /* + * Load the PID with kernel PID (0). + * Also load our MSR_IS and TID to MMUCR for TLB search. + */ + li r3, 0 + mtspr SPRN_PID, r3 + mfmsr r4 + andi. r4,r4,MSR_IS@l + beq wmmucr + oris r3,r3,PPC44x_MMUCR_STS@h +wmmucr: + mtspr SPRN_MMUCR,r3 + sync + + /* + * Invalidate all the TLB entries except the current entry + * where we are running from + */ + bl 0f /* Find our address */ +0: mflr r5 /* Make it accessible */ + tlbsx r23,0,r5 /* Find entry we are in */ + li r4,0 /* Start at TLB entry 0 */ + li r3,0 /* Set PAGEID inval value */ +1: cmpw r23,r4 /* Is this our entry? */ + beq skip /* If so, skip the inval */ + tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ +skip: + addi r4,r4,1 /* Increment */ + cmpwi r4,64 /* Are we done? */ + bne 1b /* If not, repeat */ + isync + + /* Create a temp mapping and jump to it */ + andi. r6, r23, 1 /* Find the index to use */ + addi r24, r6, 1 /* r24 will contain 1 or 2 */ + + mfmsr r9 /* get the MSR */ + rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */ + xori r7, r5, 1 /* Use the other address space */ + + /* Read the current mapping entries */ + tlbre r3, r23, PPC44x_TLB_PAGEID + tlbre r4, r23, PPC44x_TLB_XLAT + tlbre r5, r23, PPC44x_TLB_ATTRIB + + /* Save our current XLAT entry */ + mr r25, r4 + + /* Extract the TLB PageSize */ + li r10, 1 /* r10 will hold PageSize */ + rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */ + + /* XXX: As of now we use 256M, 4K pages */ + cmpwi r11, PPC44x_TLB_256M + bne tlb_4k + rotlwi r10, r10, 28 /* r10 = 256M */ + b write_out +tlb_4k: + cmpwi r11, PPC44x_TLB_4K + bne default + rotlwi r10, r10, 12 /* r10 = 4K */ + b write_out +default: + rotlwi r10, r10, 10 /* r10 = 1K */ + +write_out: + /* + * Write out the tmp 1:1 mapping for this code in other address space + * Fixup EPN = RPN , TS=other address space + */ + insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */ + + /* Write out the tmp mapping entries */ + tlbwe r3, r24, PPC44x_TLB_PAGEID + tlbwe r4, r24, PPC44x_TLB_XLAT + tlbwe r5, r24, PPC44x_TLB_ATTRIB + + subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */ + not r10, r11 /* Mask for PageNum */ + + /* Switch to other address space in MSR */ + insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ + + bl 1f +1: mflr r8 + addi r8, r8, (2f-1b) /* Find the target offset */ + + /* Jump to the tmp mapping */ + mtspr SPRN_SRR0, r8 + mtspr SPRN_SRR1, r9 + rfi + +2: + /* Invalidate the entry we were executing from */ + li r3, 0 + tlbwe r3, r23, PPC44x_TLB_PAGEID + + /* attribute fields. rwx for SUPERVISOR mode */ + li r5, 0 + ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) + + /* Create 1:1 mapping in 256M pages */ + xori r7, r7, 1 /* Revert back to Original TS */ + + li r8, 0 /* PageNumber */ + li r6, 3 /* TLB Index, start at 3 */ + +next_tlb: + rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */ + mr r4, r3 /* RPN = EPN */ + ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */ + insrwi r3, r7, 1, 23 /* Set TS from r7 */ + + tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */ + tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */ + tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */ + + addi r8, r8, 1 /* Increment PN */ + addi r6, r6, 1 /* Increment TLB Index */ + cmpwi r8, 8 /* Are we done ? */ + bne next_tlb + isync + + /* Jump to the new mapping 1:1 */ + li r9,0 + insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ + + bl 1f +1: mflr r8 + and r8, r8, r11 /* Get our offset within page */ + addi r8, r8, (2f-1b) + + and r5, r25, r10 /* Get our target PageNum */ + or r8, r8, r5 /* Target jump address */ + + mtspr SPRN_SRR0, r8 + mtspr SPRN_SRR1, r9 + rfi +2: + /* Invalidate the tmp entry we used */ + li r3, 0 + tlbwe r3, r24, PPC44x_TLB_PAGEID + sync + b ppc44x_map_done + +#ifdef CONFIG_PPC_47x + + /* 1:1 mapping for 47x */ + +setup_map_47x: + + /* + * Load the kernel pid (0) to PID and also to MMUCR[TID]. + * Also set the MSR IS->MMUCR STS + */ + li r3, 0 + mtspr SPRN_PID, r3 /* Set PID */ + mfmsr r4 /* Get MSR */ + andi. r4, r4, MSR_IS@l /* TS=1? */ + beq 1f /* If not, leave STS=0 */ + oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */ +1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */ + sync + + /* Find the entry we are running from */ + bl 2f +2: mflr r23 + tlbsx r23, 0, r23 + tlbre r24, r23, 0 /* TLB Word 0 */ + tlbre r25, r23, 1 /* TLB Word 1 */ + tlbre r26, r23, 2 /* TLB Word 2 */ + + + /* + * Invalidates all the tlb entries by writing to 256 RPNs(r4) + * of 4k page size in all 4 ways (0-3 in r3). + * This would invalidate the entire UTLB including the one we are + * running from. However the shadow TLB entries would help us + * to continue the execution, until we flush them (rfi/isync). + */ + addis r3, 0, 0x8000 /* specify the way */ + addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */ + addi r5, 0, 0 + b clear_utlb_entry + + /* Align the loop to speed things up. from head_44x.S */ + .align 6 + +clear_utlb_entry: + + tlbwe r4, r3, 0 + tlbwe r5, r3, 1 + tlbwe r5, r3, 2 + addis r3, r3, 0x2000 /* Increment the way */ + cmpwi r3, 0 + bne clear_utlb_entry + addis r3, 0, 0x8000 + addis r4, r4, 0x100 /* Increment the EPN */ + cmpwi r4, 0 + bne clear_utlb_entry + + /* Create the entries in the other address space */ + mfmsr r5 + rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */ + xori r7, r7, 1 /* r7 = !TS */ + + insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */ + + /* + * write out the TLB entries for the tmp mapping + * Use way '0' so that we could easily invalidate it later. + */ + lis r3, 0x8000 /* Way '0' */ + + tlbwe r24, r3, 0 + tlbwe r25, r3, 1 + tlbwe r26, r3, 2 + + /* Update the msr to the new TS */ + insrwi r5, r7, 1, 26 + + bl 1f +1: mflr r6 + addi r6, r6, (2f-1b) + + mtspr SPRN_SRR0, r6 + mtspr SPRN_SRR1, r5 + rfi + + /* + * Now we are in the tmp address space. + * Create a 1:1 mapping for 0-2GiB in the original TS. + */ +2: + li r3, 0 + li r4, 0 /* TLB Word 0 */ + li r5, 0 /* TLB Word 1 */ + li r6, 0 + ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */ + + li r8, 0 /* PageIndex */ + + xori r7, r7, 1 /* revert back to original TS */ + +write_utlb: + rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */ + /* ERPN = 0 as we don't use memory above 2G */ + + mr r4, r5 /* EPN = RPN */ + ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M) + insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */ + + tlbwe r4, r3, 0 /* Write out the entries */ + tlbwe r5, r3, 1 + tlbwe r6, r3, 2 + addi r8, r8, 1 + cmpwi r8, 8 /* Have we completed ? */ + bne write_utlb + + /* make sure we complete the TLB write up */ + isync + + /* + * Prepare to jump to the 1:1 mapping. + * 1) Extract page size of the tmp mapping + * DSIZ = TLB_Word0[22:27] + * 2) Calculate the physical address of the address + * to jump to. + */ + rlwinm r10, r24, 0, 22, 27 + + cmpwi r10, PPC47x_TLB0_4K + bne 0f + li r10, 0x1000 /* r10 = 4k */ + bl 1f + +0: + /* Defaults to 256M */ + lis r10, 0x1000 + + bl 1f +1: mflr r4 + addi r4, r4, (2f-1b) /* virtual address of 2f */ + + subi r11, r10, 1 /* offsetmask = Pagesize - 1 */ + not r10, r11 /* Pagemask = ~(offsetmask) */ + + and r5, r25, r10 /* Physical page */ + and r6, r4, r11 /* offset within the current page */ + + or r5, r5, r6 /* Physical address for 2f */ + + /* Switch the TS in MSR to the original one */ + mfmsr r8 + insrwi r8, r7, 1, 26 + + mtspr SPRN_SRR1, r8 + mtspr SPRN_SRR0, r5 + rfi + +2: + /* Invalidate the tmp mapping */ + lis r3, 0x8000 /* Way '0' */ + + clrrwi r24, r24, 12 /* Clear the valid bit */ + tlbwe r24, r3, 0 + tlbwe r25, r3, 1 + tlbwe r26, r3, 2 + + /* Make sure we complete the TLB write and flush the shadow TLB */ + isync + +#endif + +ppc44x_map_done: + + + /* Restore the parameters */ + mr r3, r29 + mr r4, r30 + mr r5, r31 + + li r0, 0 +#else + li r0, 0 + + /* + * Set Machine Status Register to a known status, + * switch the MMU off and jump to 1: in a single step. + */ + + mr r8, r0 + ori r8, r8, MSR_RI|MSR_ME + mtspr SPRN_SRR1, r8 + addi r8, r4, 1f - relocate_new_kernel + mtspr SPRN_SRR0, r8 + sync + rfi + +1: +#endif + /* from this point address translation is turned off */ + /* and interrupts are disabled */ + + /* set a new stack at the bottom of our page... */ + /* (not really needed now) */ + addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */ + stw r0, 0(r1) + + /* Do the copies */ + li r6, 0 /* checksum */ + mr r0, r3 + b 1f + +0: /* top, read another word for the indirection page */ + lwzu r0, 4(r3) + +1: + /* is it a destination page? (r8) */ + rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */ + beq 2f + + rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */ + b 0b + +2: /* is it an indirection page? (r3) */ + rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */ + beq 2f + + rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */ + subi r3, r3, 4 + b 0b + +2: /* are we done? */ + rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */ + beq 2f + b 3f + +2: /* is it a source page? (r9) */ + rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */ + beq 0b + + rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */ + + li r7, PAGE_SIZE / 4 + mtctr r7 + subi r9, r9, 4 + subi r8, r8, 4 +9: + lwzu r0, 4(r9) /* do the copy */ + xor r6, r6, r0 + stwu r0, 4(r8) + dcbst 0, r8 + sync + icbi 0, r8 + bdnz 9b + + addi r9, r9, 4 + addi r8, r8, 4 + b 0b + +3: + + /* To be certain of avoiding problems with self-modifying code + * execute a serializing instruction here. + */ + isync + sync + + mfspr r3, SPRN_PIR /* current core we are running on */ + mr r4, r5 /* load physical address of chunk called */ + + /* jump to the entry point, usually the setup routine */ + mtlr r5 + blrl + +1: b 1b + +relocate_new_kernel_end: + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .long relocate_new_kernel_end - relocate_new_kernel diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index f4e4a1926a7a5b..d80212be86988f 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -6,11 +6,6 @@ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) * and Paul Mackerras. * - * kexec bits: - * Copyright (C) 2002-2003 Eric Biederman - * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz - * PPC44x port. Copyright (C) 2011, IBM Corporation - * Author: Suzuki Poulose */ #include @@ -25,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -494,488 +488,3 @@ _GLOBAL(start_secondary_resume) */ _GLOBAL(__main) blr - -#ifdef CONFIG_KEXEC_CORE - /* - * Must be relocatable PIC code callable as a C function. - */ - .globl relocate_new_kernel -relocate_new_kernel: - /* r3 = page_list */ - /* r4 = reboot_code_buffer */ - /* r5 = start_address */ - -#ifdef CONFIG_FSL_BOOKE - - mr r29, r3 - mr r30, r4 - mr r31, r5 - -#define ENTRY_MAPPING_KEXEC_SETUP -#include "fsl_booke_entry_mapping.S" -#undef ENTRY_MAPPING_KEXEC_SETUP - - mr r3, r29 - mr r4, r30 - mr r5, r31 - - li r0, 0 -#elif defined(CONFIG_44x) - - /* Save our parameters */ - mr r29, r3 - mr r30, r4 - mr r31, r5 - -#ifdef CONFIG_PPC_47x - /* Check for 47x cores */ - mfspr r3,SPRN_PVR - srwi r3,r3,16 - cmplwi cr0,r3,PVR_476FPE@h - beq setup_map_47x - cmplwi cr0,r3,PVR_476@h - beq setup_map_47x - cmplwi cr0,r3,PVR_476_ISS@h - beq setup_map_47x -#endif /* CONFIG_PPC_47x */ - -/* - * Code for setting up 1:1 mapping for PPC440x for KEXEC - * - * We cannot switch off the MMU on PPC44x. - * So we: - * 1) Invalidate all the mappings except the one we are running from. - * 2) Create a tmp mapping for our code in the other address space(TS) and - * jump to it. Invalidate the entry we started in. - * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS. - * 4) Jump to the 1:1 mapping in original TS. - * 5) Invalidate the tmp mapping. - * - * - Based on the kexec support code for FSL BookE - * - */ - - /* - * Load the PID with kernel PID (0). - * Also load our MSR_IS and TID to MMUCR for TLB search. - */ - li r3, 0 - mtspr SPRN_PID, r3 - mfmsr r4 - andi. r4,r4,MSR_IS@l - beq wmmucr - oris r3,r3,PPC44x_MMUCR_STS@h -wmmucr: - mtspr SPRN_MMUCR,r3 - sync - - /* - * Invalidate all the TLB entries except the current entry - * where we are running from - */ - bl 0f /* Find our address */ -0: mflr r5 /* Make it accessible */ - tlbsx r23,0,r5 /* Find entry we are in */ - li r4,0 /* Start at TLB entry 0 */ - li r3,0 /* Set PAGEID inval value */ -1: cmpw r23,r4 /* Is this our entry? */ - beq skip /* If so, skip the inval */ - tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ -skip: - addi r4,r4,1 /* Increment */ - cmpwi r4,64 /* Are we done? */ - bne 1b /* If not, repeat */ - isync - - /* Create a temp mapping and jump to it */ - andi. r6, r23, 1 /* Find the index to use */ - addi r24, r6, 1 /* r24 will contain 1 or 2 */ - - mfmsr r9 /* get the MSR */ - rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */ - xori r7, r5, 1 /* Use the other address space */ - - /* Read the current mapping entries */ - tlbre r3, r23, PPC44x_TLB_PAGEID - tlbre r4, r23, PPC44x_TLB_XLAT - tlbre r5, r23, PPC44x_TLB_ATTRIB - - /* Save our current XLAT entry */ - mr r25, r4 - - /* Extract the TLB PageSize */ - li r10, 1 /* r10 will hold PageSize */ - rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */ - - /* XXX: As of now we use 256M, 4K pages */ - cmpwi r11, PPC44x_TLB_256M - bne tlb_4k - rotlwi r10, r10, 28 /* r10 = 256M */ - b write_out -tlb_4k: - cmpwi r11, PPC44x_TLB_4K - bne default - rotlwi r10, r10, 12 /* r10 = 4K */ - b write_out -default: - rotlwi r10, r10, 10 /* r10 = 1K */ - -write_out: - /* - * Write out the tmp 1:1 mapping for this code in other address space - * Fixup EPN = RPN , TS=other address space - */ - insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */ - - /* Write out the tmp mapping entries */ - tlbwe r3, r24, PPC44x_TLB_PAGEID - tlbwe r4, r24, PPC44x_TLB_XLAT - tlbwe r5, r24, PPC44x_TLB_ATTRIB - - subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */ - not r10, r11 /* Mask for PageNum */ - - /* Switch to other address space in MSR */ - insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ - - bl 1f -1: mflr r8 - addi r8, r8, (2f-1b) /* Find the target offset */ - - /* Jump to the tmp mapping */ - mtspr SPRN_SRR0, r8 - mtspr SPRN_SRR1, r9 - rfi - -2: - /* Invalidate the entry we were executing from */ - li r3, 0 - tlbwe r3, r23, PPC44x_TLB_PAGEID - - /* attribute fields. rwx for SUPERVISOR mode */ - li r5, 0 - ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) - - /* Create 1:1 mapping in 256M pages */ - xori r7, r7, 1 /* Revert back to Original TS */ - - li r8, 0 /* PageNumber */ - li r6, 3 /* TLB Index, start at 3 */ - -next_tlb: - rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */ - mr r4, r3 /* RPN = EPN */ - ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */ - insrwi r3, r7, 1, 23 /* Set TS from r7 */ - - tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */ - tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */ - tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */ - - addi r8, r8, 1 /* Increment PN */ - addi r6, r6, 1 /* Increment TLB Index */ - cmpwi r8, 8 /* Are we done ? */ - bne next_tlb - isync - - /* Jump to the new mapping 1:1 */ - li r9,0 - insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ - - bl 1f -1: mflr r8 - and r8, r8, r11 /* Get our offset within page */ - addi r8, r8, (2f-1b) - - and r5, r25, r10 /* Get our target PageNum */ - or r8, r8, r5 /* Target jump address */ - - mtspr SPRN_SRR0, r8 - mtspr SPRN_SRR1, r9 - rfi -2: - /* Invalidate the tmp entry we used */ - li r3, 0 - tlbwe r3, r24, PPC44x_TLB_PAGEID - sync - b ppc44x_map_done - -#ifdef CONFIG_PPC_47x - - /* 1:1 mapping for 47x */ - -setup_map_47x: - - /* - * Load the kernel pid (0) to PID and also to MMUCR[TID]. - * Also set the MSR IS->MMUCR STS - */ - li r3, 0 - mtspr SPRN_PID, r3 /* Set PID */ - mfmsr r4 /* Get MSR */ - andi. r4, r4, MSR_IS@l /* TS=1? */ - beq 1f /* If not, leave STS=0 */ - oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */ -1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */ - sync - - /* Find the entry we are running from */ - bl 2f -2: mflr r23 - tlbsx r23, 0, r23 - tlbre r24, r23, 0 /* TLB Word 0 */ - tlbre r25, r23, 1 /* TLB Word 1 */ - tlbre r26, r23, 2 /* TLB Word 2 */ - - - /* - * Invalidates all the tlb entries by writing to 256 RPNs(r4) - * of 4k page size in all 4 ways (0-3 in r3). - * This would invalidate the entire UTLB including the one we are - * running from. However the shadow TLB entries would help us - * to continue the execution, until we flush them (rfi/isync). - */ - addis r3, 0, 0x8000 /* specify the way */ - addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */ - addi r5, 0, 0 - b clear_utlb_entry - - /* Align the loop to speed things up. from head_44x.S */ - .align 6 - -clear_utlb_entry: - - tlbwe r4, r3, 0 - tlbwe r5, r3, 1 - tlbwe r5, r3, 2 - addis r3, r3, 0x2000 /* Increment the way */ - cmpwi r3, 0 - bne clear_utlb_entry - addis r3, 0, 0x8000 - addis r4, r4, 0x100 /* Increment the EPN */ - cmpwi r4, 0 - bne clear_utlb_entry - - /* Create the entries in the other address space */ - mfmsr r5 - rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */ - xori r7, r7, 1 /* r7 = !TS */ - - insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */ - - /* - * write out the TLB entries for the tmp mapping - * Use way '0' so that we could easily invalidate it later. - */ - lis r3, 0x8000 /* Way '0' */ - - tlbwe r24, r3, 0 - tlbwe r25, r3, 1 - tlbwe r26, r3, 2 - - /* Update the msr to the new TS */ - insrwi r5, r7, 1, 26 - - bl 1f -1: mflr r6 - addi r6, r6, (2f-1b) - - mtspr SPRN_SRR0, r6 - mtspr SPRN_SRR1, r5 - rfi - - /* - * Now we are in the tmp address space. - * Create a 1:1 mapping for 0-2GiB in the original TS. - */ -2: - li r3, 0 - li r4, 0 /* TLB Word 0 */ - li r5, 0 /* TLB Word 1 */ - li r6, 0 - ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */ - - li r8, 0 /* PageIndex */ - - xori r7, r7, 1 /* revert back to original TS */ - -write_utlb: - rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */ - /* ERPN = 0 as we don't use memory above 2G */ - - mr r4, r5 /* EPN = RPN */ - ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M) - insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */ - - tlbwe r4, r3, 0 /* Write out the entries */ - tlbwe r5, r3, 1 - tlbwe r6, r3, 2 - addi r8, r8, 1 - cmpwi r8, 8 /* Have we completed ? */ - bne write_utlb - - /* make sure we complete the TLB write up */ - isync - - /* - * Prepare to jump to the 1:1 mapping. - * 1) Extract page size of the tmp mapping - * DSIZ = TLB_Word0[22:27] - * 2) Calculate the physical address of the address - * to jump to. - */ - rlwinm r10, r24, 0, 22, 27 - - cmpwi r10, PPC47x_TLB0_4K - bne 0f - li r10, 0x1000 /* r10 = 4k */ - bl 1f - -0: - /* Defaults to 256M */ - lis r10, 0x1000 - - bl 1f -1: mflr r4 - addi r4, r4, (2f-1b) /* virtual address of 2f */ - - subi r11, r10, 1 /* offsetmask = Pagesize - 1 */ - not r10, r11 /* Pagemask = ~(offsetmask) */ - - and r5, r25, r10 /* Physical page */ - and r6, r4, r11 /* offset within the current page */ - - or r5, r5, r6 /* Physical address for 2f */ - - /* Switch the TS in MSR to the original one */ - mfmsr r8 - insrwi r8, r7, 1, 26 - - mtspr SPRN_SRR1, r8 - mtspr SPRN_SRR0, r5 - rfi - -2: - /* Invalidate the tmp mapping */ - lis r3, 0x8000 /* Way '0' */ - - clrrwi r24, r24, 12 /* Clear the valid bit */ - tlbwe r24, r3, 0 - tlbwe r25, r3, 1 - tlbwe r26, r3, 2 - - /* Make sure we complete the TLB write and flush the shadow TLB */ - isync - -#endif - -ppc44x_map_done: - - - /* Restore the parameters */ - mr r3, r29 - mr r4, r30 - mr r5, r31 - - li r0, 0 -#else - li r0, 0 - - /* - * Set Machine Status Register to a known status, - * switch the MMU off and jump to 1: in a single step. - */ - - mr r8, r0 - ori r8, r8, MSR_RI|MSR_ME - mtspr SPRN_SRR1, r8 - addi r8, r4, 1f - relocate_new_kernel - mtspr SPRN_SRR0, r8 - sync - rfi - -1: -#endif - /* from this point address translation is turned off */ - /* and interrupts are disabled */ - - /* set a new stack at the bottom of our page... */ - /* (not really needed now) */ - addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */ - stw r0, 0(r1) - - /* Do the copies */ - li r6, 0 /* checksum */ - mr r0, r3 - b 1f - -0: /* top, read another word for the indirection page */ - lwzu r0, 4(r3) - -1: - /* is it a destination page? (r8) */ - rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */ - beq 2f - - rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */ - b 0b - -2: /* is it an indirection page? (r3) */ - rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */ - beq 2f - - rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */ - subi r3, r3, 4 - b 0b - -2: /* are we done? */ - rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */ - beq 2f - b 3f - -2: /* is it a source page? (r9) */ - rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */ - beq 0b - - rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */ - - li r7, PAGE_SIZE / 4 - mtctr r7 - subi r9, r9, 4 - subi r8, r8, 4 -9: - lwzu r0, 4(r9) /* do the copy */ - xor r6, r6, r0 - stwu r0, 4(r8) - dcbst 0, r8 - sync - icbi 0, r8 - bdnz 9b - - addi r9, r9, 4 - addi r8, r8, 4 - b 0b - -3: - - /* To be certain of avoiding problems with self-modifying code - * execute a serializing instruction here. - */ - isync - sync - - mfspr r3, SPRN_PIR /* current core we are running on */ - mr r4, r5 /* load physical address of chunk called */ - - /* jump to the entry point, usually the setup routine */ - mtlr r5 - blrl - -1: b 1b - -relocate_new_kernel_end: - - .globl relocate_new_kernel_size -relocate_new_kernel_size: - .long relocate_new_kernel_end - relocate_new_kernel -#endif From 793b08e2efff3ec020c5c5861d00ed394fcdd488 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Oct 2019 12:13:58 +0000 Subject: [PATCH 134/144] powerpc/kexec: Move kexec files into a dedicated subdir. arch/powerpc/kernel/ contains 8 files dedicated to kexec. Move them into a dedicated subdirectory. Signed-off-by: Christophe Leroy [mpe: Move to a/p/kexec, drop the 'machine' naming and use 'core' instead] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/afbef97ec6a978574a5cf91a4441000e0a9da42a.1572351221.git.christophe.leroy@c-s.fr --- arch/powerpc/Kbuild | 1 + arch/powerpc/kernel/Makefile | 18 ------------- arch/powerpc/kexec/Makefile | 25 +++++++++++++++++++ .../{kernel/machine_kexec.c => kexec/core.c} | 0 .../machine_kexec_32.c => kexec/core_32.c} | 0 .../machine_kexec_64.c => kexec/core_64.c} | 0 arch/powerpc/{kernel => kexec}/crash.c | 0 .../{kernel/kexec_elf_64.c => kexec/elf_64.c} | 0 .../file_load.c} | 0 .../{kernel/ima_kexec.c => kexec/ima.c} | 0 .../relocate_32.S} | 2 +- 11 files changed, 27 insertions(+), 19 deletions(-) create mode 100644 arch/powerpc/kexec/Makefile rename arch/powerpc/{kernel/machine_kexec.c => kexec/core.c} (100%) rename arch/powerpc/{kernel/machine_kexec_32.c => kexec/core_32.c} (100%) rename arch/powerpc/{kernel/machine_kexec_64.c => kexec/core_64.c} (100%) rename arch/powerpc/{kernel => kexec}/crash.c (100%) rename arch/powerpc/{kernel/kexec_elf_64.c => kexec/elf_64.c} (100%) rename arch/powerpc/{kernel/machine_kexec_file_64.c => kexec/file_load.c} (100%) rename arch/powerpc/{kernel/ima_kexec.c => kexec/ima.c} (100%) rename arch/powerpc/{kernel/kexec_relocate_32.S => kexec/relocate_32.S} (99%) diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild index 51e6908323ad53..5e2f9eaa3ee7d5 100644 --- a/arch/powerpc/Kbuild +++ b/arch/powerpc/Kbuild @@ -14,4 +14,5 @@ obj-$(CONFIG_XMON) += xmon/ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_PERF_EVENTS) += perf/ +obj-$(CONFIG_KEXEC_CORE) += kexec/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fadbc1eb25861f..c1df4e51882980 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -5,9 +5,6 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -# Disable clang warning for using setjmp without setjmp.h header -CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) - ifdef CONFIG_PPC64 CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) endif @@ -82,7 +79,6 @@ obj-$(CONFIG_FA_DUMP) += fadump.o obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o ifdef CONFIG_PPC32 obj-$(CONFIG_E500) += idle_e500.o -obj-$(CONFIG_KEXEC_CORE) += kexec_relocate_32.o endif obj-$(CONFIG_PPC_BOOK3S_32) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o obj-$(CONFIG_TAU) += tau_6xx.o @@ -126,14 +122,6 @@ pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \ - machine_kexec_$(BITS).o -obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o -ifdef CONFIG_HAVE_IMA_KEXEC -ifdef CONFIG_IMA -obj-y += ima_kexec.o -endif -endif obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o @@ -168,12 +156,6 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o GCOV_PROFILE_prom_init.o := n KCOV_INSTRUMENT_prom_init.o := n UBSAN_SANITIZE_prom_init.o := n -GCOV_PROFILE_machine_kexec_64.o := n -KCOV_INSTRUMENT_machine_kexec_64.o := n -UBSAN_SANITIZE_machine_kexec_64.o := n -GCOV_PROFILE_machine_kexec_32.o := n -KCOV_INSTRUMENT_machine_kexec_32.o := n -UBSAN_SANITIZE_machine_kexec_32.o := n GCOV_PROFILE_kprobes.o := n KCOV_INSTRUMENT_kprobes.o := n UBSAN_SANITIZE_kprobes.o := n diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile new file mode 100644 index 00000000000000..16c1c5a1951980 --- /dev/null +++ b/arch/powerpc/kexec/Makefile @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the linux kernel. +# + +# Disable clang warning for using setjmp without setjmp.h header +CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) + +obj-y += core.o crash.o core_$(BITS).o + +obj-$(CONFIG_PPC32) += relocate_32.o + +obj-$(CONFIG_KEXEC_FILE) += file_load.o elf_$(BITS).o + +ifdef CONFIG_HAVE_IMA_KEXEC +ifdef CONFIG_IMA +obj-y += ima.o +endif +endif + + +# Disable GCOV, KCOV & sanitizers in odd or sensitive code +GCOV_PROFILE_core_$(BITS).o := n +KCOV_INSTRUMENT_core_$(BITS).o := n +UBSAN_SANITIZE_core_$(BITS).o := n diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kexec/core.c similarity index 100% rename from arch/powerpc/kernel/machine_kexec.c rename to arch/powerpc/kexec/core.c diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kexec/core_32.c similarity index 100% rename from arch/powerpc/kernel/machine_kexec_32.c rename to arch/powerpc/kexec/core_32.c diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kexec/core_64.c similarity index 100% rename from arch/powerpc/kernel/machine_kexec_64.c rename to arch/powerpc/kexec/core_64.c diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kexec/crash.c similarity index 100% rename from arch/powerpc/kernel/crash.c rename to arch/powerpc/kexec/crash.c diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kexec/elf_64.c similarity index 100% rename from arch/powerpc/kernel/kexec_elf_64.c rename to arch/powerpc/kexec/elf_64.c diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kexec/file_load.c similarity index 100% rename from arch/powerpc/kernel/machine_kexec_file_64.c rename to arch/powerpc/kexec/file_load.c diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kexec/ima.c similarity index 100% rename from arch/powerpc/kernel/ima_kexec.c rename to arch/powerpc/kexec/ima.c diff --git a/arch/powerpc/kernel/kexec_relocate_32.S b/arch/powerpc/kexec/relocate_32.S similarity index 99% rename from arch/powerpc/kernel/kexec_relocate_32.S rename to arch/powerpc/kexec/relocate_32.S index 8a8b4887c879c5..61946c19e07ca8 100644 --- a/arch/powerpc/kernel/kexec_relocate_32.S +++ b/arch/powerpc/kexec/relocate_32.S @@ -32,7 +32,7 @@ relocate_new_kernel: mr r31, r5 #define ENTRY_MAPPING_KEXEC_SETUP -#include "fsl_booke_entry_mapping.S" +#include #undef ENTRY_MAPPING_KEXEC_SETUP mr r3, r29 From 9d72dcef891030545f39ad386a30cf91df517fb2 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Mon, 18 Nov 2019 17:55:53 +1100 Subject: [PATCH 135/144] powerpc/powernv: Disable native PCIe port management On PowerNV the PCIe topology is (currently) managed by the powernv platform code in Linux in cooperation with the platform firmware. Linux's native PCIe port service drivers operate independently of both and this can cause problems. The main issue is that the portbus driver will conflict with the platform specific hotplug driver (pnv_php) over ownership of the MSI used to notify the host when a hotplug event occurs. The portbus driver claims this MSI on behalf of the individual port services because the same interrupt is used for hotplug events, PMEs (on root ports), and link bandwidth change notifications. The portbus driver will always claim the interrupt even if the individual port service drivers, such as pciehp, are compiled out. The second, bigger, problem is that the hotplug port service driver fundamentally does not work on PowerNV. The platform assumes that all PCI devices have a corresponding arch-specific handle derived from the DT node for the device (pci_dn) and without one the platform will not allow a PCI device to be enabled. This problem is largely due to historical baggage, but it can't be resolved without significant re-factoring of the platform PCI support. We can fix these problems in the interim by setting the "pcie_ports_disabled" flag during platform initialisation. The flag indicates the platform owns the PCIe ports which stops the portbus driver from being registered. This does have the side effect of disabling all port services drivers that is: AER, PME, BW notifications, hotplug, and DPC. However, this is not a huge disadvantage on PowerNV since these services are either unused or handled through other means. Fixes: 66725152fb9f ("PCI/hotplug: PowerPC PowerNV PCI hotplug driver") Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191118065553.30362-1-oohall@gmail.com --- arch/powerpc/platforms/powernv/pci.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 2825d004dece27..c0bea75ac27bfa 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -945,6 +945,23 @@ void __init pnv_pci_init(void) if (!firmware_has_feature(FW_FEATURE_OPAL)) return; +#ifdef CONFIG_PCIEPORTBUS + /* + * On PowerNV PCIe devices are (currently) managed in cooperation + * with firmware. This isn't *strictly* required, but there's enough + * assumptions baked into both firmware and the platform code that + * it's unwise to allow the portbus services to be used. + * + * We need to fix this eventually, but for now set this flag to disable + * the portbus driver. The AER service isn't required since that AER + * events are handled via EEH. The pciehp hotplug driver can't work + * without kernel changes (and portbus binding breaks pnv_php). The + * other services also require some thinking about how we're going + * to integrate them. + */ + pcie_ports_disabled = true; +#endif + /* Look for IODA IO-Hubs. */ for_each_compatible_node(np, NULL, "ibm,ioda-hub") { pnv_pci_init_ioda_hub(np); From bf9c95e23324cbaf2e58fc3f0cbdc73137f2d1ca Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 20 Nov 2019 13:37:23 +1100 Subject: [PATCH 136/144] selftests/powerpc: spectre_v2 test must be built 64-bit The spectre_v2 test must be built 64-bit, it includes hand-written asm that is 64-bit only, and segfaults if built 32-bit. Fixes: c790c3d2b0ec ("selftests/powerpc: Add a test of spectre_v2 mitigations") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191120023924.13130-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/security/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index d68e6031695c40..eadbbff50be6c1 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -8,4 +8,6 @@ CFLAGS += -I../../../../../usr/include include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c + +$(OUTPUT)/spectre_v2: CFLAGS += -m64 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S From f2bb86937d86ebcb0e52f95b6d19aba1d850e601 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 12 Sep 2019 13:49:41 +0000 Subject: [PATCH 137/144] powerpc/fixmap: don't clear fixmap area in paging_init() fixmap is intended to map things permanently like the IMMR region on FSL SOC (8xx, 83xx, ...), so don't clear it when initialising paging() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/41c99bc06394a6bc2888631cb98a3ed2ae281ddb.1568295907.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/mem.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 7573002077a67b..bf0c54b1f161d8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -237,14 +237,6 @@ void __init paging_init(void) unsigned long long total_ram = memblock_phys_mem_size(); phys_addr_t top_of_ram = memblock_end_of_DRAM(); -#ifdef CONFIG_PPC32 - unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1); - unsigned long end = __fix_to_virt(FIX_HOLE); - - for (; v < end; v += PAGE_SIZE) - map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */ -#endif - #ifdef CONFIG_HIGHMEM map_kernel_page(PKMAP_BASE, 0, __pgprot(0)); /* XXX gross */ pkmap_page_table = virt_to_kpte(PKMAP_BASE); From 5f017a56aa5da7f646a858475d57730cd155c9f1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 21 Nov 2019 04:21:01 +0100 Subject: [PATCH 138/144] powerpc: Fix Kconfig indentation Adjust indentation from spaces to tab (+optional two spaces) as in coding style with command like: $ sed -e 's/^ /\t/' -i */Kconfig Signed-off-by: Krzysztof Kozlowski Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1574306461-7646-1-git-send-email-krzk@kernel.org --- arch/powerpc/Kconfig.debug | 18 +++++++++--------- arch/powerpc/platforms/Kconfig.cputype | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index c59920920ddc43..4e1d3984746270 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -122,8 +122,8 @@ config XMON_DEFAULT_RO_MODE depends on XMON default y help - Operate xmon in read-only mode. The cmdline options 'xmon=rw' and - 'xmon=ro' override this default. + Operate xmon in read-only mode. The cmdline options 'xmon=rw' and + 'xmon=ro' override this default. config DEBUGGER bool @@ -222,7 +222,7 @@ config PPC_EARLY_DEBUG_44x help Select this to enable early debugging for IBM 44x chips via the inbuilt serial port. If you enable this, ensure you set - PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board. + PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board. config PPC_EARLY_DEBUG_40x bool "Early serial debugging for IBM/AMCC 40x CPUs" @@ -325,7 +325,7 @@ config PPC_EARLY_DEBUG_44x_PHYSLOW default "0x40000200" help You probably want 0x40000200 for ebony boards and - 0x40000300 for taishan + 0x40000300 for taishan config PPC_EARLY_DEBUG_44x_PHYSHIGH hex "EPRN of early debug UART physical address" @@ -359,9 +359,9 @@ config FAIL_IOMMU If you are unsure, say N. config PPC_PTDUMP - bool "Export kernel pagetable layout to userspace via debugfs" - depends on DEBUG_KERNEL && DEBUG_FS - help + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL && DEBUG_FS + help This option exports the state of the kernel pagetables to a debugfs file. This is only useful for kernel developers who are working in architecture specific areas of the kernel - probably @@ -390,8 +390,8 @@ config PPC_DEBUG_WX config PPC_FAST_ENDIAN_SWITCH bool "Deprecated fast endian-switch syscall" - depends on DEBUG_KERNEL && PPC_BOOK3S_64 - help + depends on DEBUG_KERNEL && PPC_BOOK3S_64 + help If you're unsure what this is, say N. config KASAN_SHADOW_OFFSET diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 12543e53fa96a5..1e352c2eea7a0d 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -415,13 +415,13 @@ config PPC_MM_SLICES bool config PPC_HAVE_PMU_SUPPORT - bool + bool config PPC_PERF_CTRS - def_bool y - depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT - help - This enables the powerpc-specific perf_event back-end. + def_bool y + depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT + help + This enables the powerpc-specific perf_event back-end. config FORCE_SMP # Allow platforms to force SMP=y by selecting this From 465bfd9c44dea6b55962b5788a23ac87a467c923 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 18 Nov 2019 21:57:10 -0700 Subject: [PATCH 139/144] powerpc: Don't add -mabi= flags when building with Clang When building pseries_defconfig, building vdso32 errors out: error: unknown target ABI 'elfv1' This happens because -m32 in clang changes the target to 32-bit, which does not allow the ABI to be changed. Commit 4dc831aa8813 ("powerpc: Fix compiling a BE kernel with a powerpc64le toolchain") added these flags to fix building big endian kernels with a little endian GCC. Clang doesn't need -mabi because the target triple controls the default value. -mlittle-endian and -mbig-endian manipulate the triple into either powerpc64-* or powerpc64le-*, which properly sets the default ABI. Adding a debug print out in the PPC64TargetInfo constructor after line 383 above shows this: $ echo | ./clang -E --target=powerpc64-linux -mbig-endian -o /dev/null - Default ABI: elfv1 $ echo | ./clang -E --target=powerpc64-linux -mlittle-endian -o /dev/null - Default ABI: elfv2 $ echo | ./clang -E --target=powerpc64le-linux -mbig-endian -o /dev/null - Default ABI: elfv1 $ echo | ./clang -E --target=powerpc64le-linux -mlittle-endian -o /dev/null - Default ABI: elfv2 Don't specify -mabi when building with clang to avoid the build error with -m32 and not change any code generation. -mcall-aixdesc is not an implemented flag in clang so it can be safely excluded as well, see commit 238abecde8ad ("powerpc: Don't use gcc specific options on clang"). pseries_defconfig successfully builds after this patch and powernv_defconfig and ppc44x_defconfig don't regress. Reviewed-by: Daniel Axtens Signed-off-by: Nathan Chancellor [mpe: Trim clang links in change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191119045712.39633-2-natechancellor@gmail.com --- arch/powerpc/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a3ed4f168607be..f35730548e4293 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -91,11 +91,13 @@ MULTIPLEWORD := -mmultiple endif ifdef CONFIG_PPC64 +ifndef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 endif +endif ifndef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align @@ -141,6 +143,7 @@ endif endif CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no) +ifndef CONFIG_CC_IS_CLANG ifdef CONFIG_CPU_LITTLE_ENDIAN CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) @@ -149,6 +152,7 @@ CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif +endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) From c9029ef9c95765e7b63c4d9aa780674447db1ec0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 18 Nov 2019 21:57:11 -0700 Subject: [PATCH 140/144] powerpc: Avoid clang warnings around setjmp and longjmp Commit aea447141c7e ("powerpc: Disable -Wbuiltin-requires-header when setjmp is used") disabled -Wbuiltin-requires-header because of a warning about the setjmp and longjmp declarations. r367387 in clang added another diagnostic around this, complaining that there is no jmp_buf declaration. In file included from ../arch/powerpc/xmon/xmon.c:47: ../arch/powerpc/include/asm/setjmp.h:10:13: error: declaration of built-in function 'setjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header . [-Werror,-Wincomplete-setjmp-declaration] extern long setjmp(long *); ^ ../arch/powerpc/include/asm/setjmp.h:11:13: error: declaration of built-in function 'longjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header . [-Werror,-Wincomplete-setjmp-declaration] extern void longjmp(long *, long); ^ 2 errors generated. We are not using the standard library's longjmp/setjmp implementations for obvious reasons; make this clear to clang by using -ffreestanding on these files. Cc: stable@vger.kernel.org # 4.14+ Suggested-by: Segher Boessenkool Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191119045712.39633-3-natechancellor@gmail.com --- arch/powerpc/kexec/Makefile | 4 ++-- arch/powerpc/xmon/Makefile | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 16c1c5a1951980..378f6108a41469 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile @@ -3,8 +3,8 @@ # Makefile for the linux kernel. # -# Disable clang warning for using setjmp without setjmp.h header -CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) +# Avoid clang warnings around longjmp/setjmp declarations +CFLAGS_crash.o += -ffreestanding obj-y += core.o crash.o core_$(BITS).o diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index f142570ad86066..c3842dbeb1b75e 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -# Disable clang warning for using setjmp without setjmp.h header -subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header) +# Avoid clang warnings around longjmp/setjmp declarations +subdir-ccflags-y := -ffreestanding GCOV_PROFILE := n KCOV_INSTRUMENT := n From 8dcd71b45df34d9b903450fab147ee8c1e6c16b5 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 18 Nov 2019 21:57:12 -0700 Subject: [PATCH 141/144] powerpc/prom_init: Use -ffreestanding to avoid a reference to bcmp LLVM revision r374662 gives LLVM the ability to convert certain loops into a reference to bcmp as an optimization; this breaks prom_init_check.sh: CALL arch/powerpc/kernel/prom_init_check.sh Error: External symbol 'bcmp' referenced from prom_init.c make[2]: *** [arch/powerpc/kernel/Makefile:196: prom_init_check] Error 1 bcmp is defined in lib/string.c as a wrapper for memcmp so this could be added to the whitelist. However, commit 450e7dd4001f ("powerpc/prom_init: don't use string functions from lib/") copied memcmp as prom_memcmp to avoid KASAN instrumentation so having bcmp be resolved to regular memcmp would break that assumption. Furthermore, because the compiler is the one that inserted bcmp, we cannot provide something like prom_bcmp. To prevent LLVM from being clever with optimizations like this, use -ffreestanding to tell LLVM we are not hosted so it is not free to make transformations like this. Reviewed-by: Nick Desaulneris Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191119045712.39633-4-natechancellor@gmail.com --- arch/powerpc/kernel/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c1df4e51882980..157b0147921f14 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -20,6 +20,7 @@ CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_prom_init.o += -ffreestanding ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code From 6f07048c00fd100ed8cab66c225c157e0b6c0a50 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 27 Nov 2019 18:41:26 +1100 Subject: [PATCH 142/144] powerpc: Define arch_is_kernel_initmem_freed() for lockdep Under certain circumstances, we hit a warning in lockdep_register_key: if (WARN_ON_ONCE(static_obj(key))) return; This occurs when the key falls into initmem that has since been freed and can now be reused. This has been observed on boot, and under memory pressure. Define arch_is_kernel_initmem_freed(), which allows lockdep to correctly identify this memory as dynamic. This fixes a bug picked up by the powerpc64 syzkaller instance where we hit the WARN via alloc_netdev_mqs. Reported-by: Qian Cai Reported-by: ppc syzbot c/o Andrew Donnellan Signed-off-by: Michael Ellerman Signed-off-by: Daniel Axtens Link: https://lore.kernel.org/r/87lfs4f7d6.fsf@dja-thinkpad.axtens.net --- arch/powerpc/include/asm/sections.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 5a9b6eb651b6c0..d19871763ed4aa 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -5,8 +5,22 @@ #include #include + +#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed + #include +extern bool init_mem_is_free; + +static inline int arch_is_kernel_initmem_freed(unsigned long addr) +{ + if (!init_mem_is_free) + return 0; + + return addr >= (unsigned long)__init_begin && + addr < (unsigned long)__init_end; +} + extern char __head_end[]; #ifdef __powerpc64__ From 6f090192f8225f52ba95d08785989688cb768cca Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 15 Nov 2019 21:08:30 +0800 Subject: [PATCH 143/144] x86/efi: remove unused variables commit ad723674d675 ("x86/efi: move common keyring handler functions to new file") leave this unused. Fixes: ad723674d675 ("x86/efi: move common keyring handler functions to new file") Reported-by: Hulk Robot Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20191115130830.13320-1-yuehaibing@huawei.com --- security/integrity/platform_certs/load_uefi.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index 4369204a19cd96..111898aad56e48 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -11,11 +11,6 @@ #include "../integrity.h" #include "keyring_handler.h" -static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID; -static efi_guid_t efi_cert_x509_sha256_guid __initdata = - EFI_CERT_X509_SHA256_GUID; -static efi_guid_t efi_cert_sha256_guid __initdata = EFI_CERT_SHA256_GUID; - /* * Look to see if a UEFI variable called MokIgnoreDB exists and return true if * it does. From 2807273f5e88ed086d7d5d838fdee71e11e5085f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 28 Nov 2019 07:59:22 +0000 Subject: [PATCH 144/144] powerpc/fixmap: fix crash with HIGHMEM Commit f2bb86937d86 ("powerpc/fixmap: don't clear fixmap area in paging_init()") removed the clearing of fixmap area in order to avoid clearing fixmapped areas set earlier. However unlike all other users of fixmap which use __set_fixmap(), HIGHMEM functions directly use __set_pte_at(). This means the page table must pre-exist, otherwise the following crash can be encoutered due to the lack of entry in the PGD. Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=4K MMU=Hash PowerMac Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0+ #2528 NIP: c0144ce8 LR: c0144ccc CTR: 00000080 REGS: ef0b5aa0 TRAP: 0300 Not tainted (5.4.0+) MSR: 00009032 CR: 44282842 XER: 00000000 DAR: fffdf000 DSISR: 42000000 GPR00: c0144ccc ef0b5b58 ef0b0000 fffdf000 fffdf000 00000000 c0000f7c 00000000 GPR08: c0833000 fffdf000 00000000 ef1c53c9 24042842 00000000 00000000 00000000 GPR16: 00000000 00000000 ef7e7358 effe8160 00000000 c08a9660 c0851644 00000004 GPR24: c08c70a8 00002dc2 00000000 00000001 00000201 effe8160 effe8160 00000000 NIP [c0144ce8] prep_new_page+0x138/0x178 LR [c0144ccc] prep_new_page+0x11c/0x178 Call Trace: [ef0b5b58] [c0144ccc] prep_new_page+0x11c/0x178 (unreliable) [ef0b5b88] [c0147218] get_page_from_freelist+0x1fc/0xd88 [ef0b5c38] [c0148328] __alloc_pages_nodemask+0xd4/0xbb4 [ef0b5cf8] [c0142ba8] __vmalloc_node_range+0x1b4/0x2e0 [ef0b5d38] [c0142dd0] vzalloc+0x48/0x58 [ef0b5d58] [c0301c8c] check_partition+0x58/0x244 [ef0b5d78] [c02ffe80] blk_add_partitions+0x44/0x2cc [ef0b5db8] [c01a32d8] bdev_disk_changed+0x68/0xfc [ef0b5de8] [c01a4494] __blkdev_get+0x290/0x460 [ef0b5e28] [c02fdd40] __device_add_disk+0x480/0x4d8 [ef0b5e68] [c0810688] brd_init+0xc0/0x188 [ef0b5e88] [c0005194] do_one_initcall+0x40/0x19c [ef0b5ee8] [c07dd4dc] kernel_init_freeable+0x164/0x230 [ef0b5f28] [c0005408] kernel_init+0x18/0x10c [ef0b5f38] [c0014274] ret_from_kernel_thread+0x14/0x1c Partially revert that commit to still clear the fixmap area dedicated to HIGHMEM. Fixes: f2bb86937d86 ("powerpc/fixmap: don't clear fixmap area in paging_init()") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d42fa9747df5afa41e67b08e374c98d3b40529c9.1574927918.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/mem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index bf0c54b1f161d8..b5835a71f10fcf 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -238,6 +238,12 @@ void __init paging_init(void) phys_addr_t top_of_ram = memblock_end_of_DRAM(); #ifdef CONFIG_HIGHMEM + unsigned long v = __fix_to_virt(FIX_KMAP_END); + unsigned long end = __fix_to_virt(FIX_KMAP_BEGIN); + + for (; v < end; v += PAGE_SIZE) + map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */ + map_kernel_page(PKMAP_BASE, 0, __pgprot(0)); /* XXX gross */ pkmap_page_table = virt_to_kpte(PKMAP_BASE);