diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled b/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled new file mode 100644 index 00000000000000..e9c2de8b368892 --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled @@ -0,0 +1,9 @@ +This ABI is renamed and moved to a new location /sys/kernel/fadump/enabled. + +What: /sys/kernel/fadump_enabled +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Primarily used to identify whether the FADump is enabled in + the kernel or not. +User: Kdump service diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered new file mode 100644 index 00000000000000..0360be39c98e9d --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered @@ -0,0 +1,10 @@ +This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.¬ + +What: /sys/kernel/fadump_registered +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Helps to control the dump collect feature from userspace. + Setting 1 to this file enables the system to collect the + dump and 0 to disable it. +User: Kdump service diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem new file mode 100644 index 00000000000000..6ce0b129ab12db --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem @@ -0,0 +1,10 @@ +This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.¬ + +What: /sys/kernel/fadump_release_mem +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + This is a special sysfs file and only available when + the system is booted to capture the vmcore using FADump. + It is used to release the memory reserved by FADump to + save the crash dump. diff --git a/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore b/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore new file mode 100644 index 00000000000000..a8d46cd0f4e61d --- /dev/null +++ b/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore @@ -0,0 +1,9 @@ +This ABI is moved to /sys/firmware/opal/mpipl/release_core. + +What: /sys/kernel/fadump_release_opalcore +Date: Sep 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + The sysfs file is available when the system is booted to + collect the dump on OPAL based machine. It used to release + the memory used to collect the opalcore. diff --git a/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups b/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups new file mode 100644 index 00000000000000..3a2dfe542e8c51 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups @@ -0,0 +1,21 @@ +What: /sys/firmware/opal/sensor_groups +Date: August 2017 +Contact: Linux for PowerPC mailing list +Description: Sensor groups directory for POWER9 powernv servers + + Each folder in this directory contains a sensor group + which are classified based on type of the sensor + like power, temperature, frequency, current, etc. They + can also indicate the group of sensors belonging to + different owners like CSM, Profiler, Job-Scheduler + +What: /sys/firmware/opal/sensor_groups//clear +Date: August 2017 +Contact: Linux for PowerPC mailing list +Description: Sysfs file to clear the min-max of all the sensors + belonging to the group. + + Writing 1 to this file will clear the minimum and + maximum values of all the sensors in the group. + In POWER9, the min-max of a sensor is the historical minimum + and maximum value of the sensor cached by OCC. diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump b/Documentation/ABI/testing/sysfs-kernel-fadump new file mode 100644 index 00000000000000..8f7a64a81783a5 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fadump @@ -0,0 +1,40 @@ +What: /sys/kernel/fadump/* +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: + The /sys/kernel/fadump/* is a collection of FADump sysfs + file provide information about the configuration status + of Firmware Assisted Dump (FADump). + +What: /sys/kernel/fadump/enabled +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Primarily used to identify whether the FADump is enabled in + the kernel or not. +User: Kdump service + +What: /sys/kernel/fadump/registered +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Helps to control the dump collect feature from userspace. + Setting 1 to this file enables the system to collect the + dump and 0 to disable it. +User: Kdump service + +What: /sys/kernel/fadump/release_mem +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + This is a special sysfs file and only available when + the system is booted to capture the vmcore using FADump. + It is used to release the memory reserved by FADump to + save the crash dump. + +What: /sys/kernel/fadump/mem_reserved +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Provide information about the amount of memory reserved by + FADump to save the crash dump in bytes. diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst index 0455a78486d5ec..b3f3ee135dbe80 100644 --- a/Documentation/powerpc/firmware-assisted-dump.rst +++ b/Documentation/powerpc/firmware-assisted-dump.rst @@ -112,13 +112,13 @@ to ensure that crash data is preserved to process later. -- On OPAL based machines (PowerNV), if the kernel is build with CONFIG_OPAL_CORE=y, OPAL memory at the time of crash is also - exported as /sys/firmware/opal/core file. This procfs file is + exported as /sys/firmware/opal/mpipl/core file. This procfs file is helpful in debugging OPAL crashes with GDB. The kernel memory used for exporting this procfs file can be released by echo'ing - '1' to /sys/kernel/fadump_release_opalcore node. + '1' to /sys/firmware/opal/mpipl/release_core node. e.g. - # echo 1 > /sys/kernel/fadump_release_opalcore + # echo 1 > /sys/firmware/opal/mpipl/release_core Implementation details: ----------------------- @@ -268,6 +268,11 @@ Here is the list of files under kernel sysfs: be handled and vmcore will not be captured. This interface can be easily integrated with kdump service start/stop. + /sys/kernel/fadump/mem_reserved + + This is used to display the memory reserved by FADump for saving the + crash dump. + /sys/kernel/fadump_release_mem This file is available only when FADump is active during second kernel. This is used to release the reserved memory @@ -283,14 +288,29 @@ Here is the list of files under kernel sysfs: enhanced to use this interface to release the memory reserved for dump and continue without 2nd reboot. - /sys/kernel/fadump_release_opalcore +Note: /sys/kernel/fadump_release_opalcore sysfs has moved to + /sys/firmware/opal/mpipl/release_core + + /sys/firmware/opal/mpipl/release_core This file is available only on OPAL based machines when FADump is active during capture kernel. This is used to release the memory - used by the kernel to export /sys/firmware/opal/core file. To + used by the kernel to export /sys/firmware/opal/mpipl/core file. To release this memory, echo '1' to it: - echo 1 > /sys/kernel/fadump_release_opalcore + echo 1 > /sys/firmware/opal/mpipl/release_core + +Note: The following FADump sysfs files are deprecated. + ++----------------------------------+--------------------------------+ +| Deprecated | Alternative | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_enabled | /sys/kernel/fadump/enabled | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_registered | /sys/kernel/fadump/registered | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_release_mem | /sys/kernel/fadump/release_mem | ++----------------------------------+--------------------------------+ Here is the list of files under powerpc debugfs: (Assuming debugfs is mounted on /sys/kernel/debug directory.) diff --git a/MAINTAINERS b/MAINTAINERS index a9ebda622ac322..534a8dc4f84a6e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9692,17 +9692,16 @@ F: include/uapi/linux/lightnvm.h LINUX FOR POWER MACINTOSH M: Benjamin Herrenschmidt -W: http://www.penguinppc.org/ L: linuxppc-dev@lists.ozlabs.org -S: Maintained +S: Odd Fixes F: arch/powerpc/platforms/powermac/ F: drivers/macintosh/ LINUX FOR POWERPC (32-BIT AND 64-BIT) -M: Benjamin Herrenschmidt -M: Paul Mackerras M: Michael Ellerman -W: https://github.com/linuxppc/linux/wiki +R: Benjamin Herrenschmidt +R: Paul Mackerras +W: https://github.com/linuxppc/wiki/wiki L: linuxppc-dev@lists.ozlabs.org Q: http://patchwork.ozlabs.org/project/linuxppc-dev/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git @@ -9719,6 +9718,8 @@ F: drivers/crypto/vmx/ F: drivers/i2c/busses/i2c-opal.c F: drivers/net/ethernet/ibm/ibmveth.* F: drivers/net/ethernet/ibm/ibmvnic.* +F: drivers/*/*/*pasemi* +F: drivers/*/*pasemi* F: drivers/pci/hotplug/pnv_php.c F: drivers/pci/hotplug/rpa* F: drivers/rtc/rtc-opal.c @@ -9735,51 +9736,31 @@ N: pseries LINUX FOR POWERPC EMBEDDED MPC5XXX M: Anatolij Gustschin L: linuxppc-dev@lists.ozlabs.org -T: git git://git.denx.de/linux-denx-agust.git -S: Maintained +S: Odd Fixes F: arch/powerpc/platforms/512x/ F: arch/powerpc/platforms/52xx/ LINUX FOR POWERPC EMBEDDED PPC4XX -M: Alistair Popple -M: Matt Porter -W: http://www.penguinppc.org/ L: linuxppc-dev@lists.ozlabs.org -S: Maintained +S: Orphan F: arch/powerpc/platforms/40x/ F: arch/powerpc/platforms/44x/ LINUX FOR POWERPC EMBEDDED PPC83XX AND PPC85XX M: Scott Wood -M: Kumar Gala -W: http://www.penguinppc.org/ L: linuxppc-dev@lists.ozlabs.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git -S: Maintained +S: Odd fixes F: arch/powerpc/platforms/83xx/ F: arch/powerpc/platforms/85xx/ F: Documentation/devicetree/bindings/powerpc/fsl/ LINUX FOR POWERPC EMBEDDED PPC8XX -M: Vitaly Bordug -W: http://www.penguinppc.org/ +M: Christophe Leroy L: linuxppc-dev@lists.ozlabs.org S: Maintained F: arch/powerpc/platforms/8xx/ -LINUX FOR POWERPC EMBEDDED XILINX VIRTEX -L: linuxppc-dev@lists.ozlabs.org -S: Orphan -F: arch/powerpc/*/*virtex* -F: arch/powerpc/*/*/*virtex* - -LINUX FOR POWERPC PA SEMI PWRFICIENT -L: linuxppc-dev@lists.ozlabs.org -S: Orphan -F: arch/powerpc/platforms/pasemi/ -F: drivers/*/*pasemi* -F: drivers/*/*/*pasemi* - LINUX KERNEL DUMP TEST MODULE (LKDTM) M: Kees Cook S: Maintained @@ -12667,16 +12648,6 @@ W: http://wireless.kernel.org/en/users/Drivers/p54 S: Maintained F: drivers/net/wireless/intersil/p54/ -PA SEMI ETHERNET DRIVER -L: netdev@vger.kernel.org -S: Orphan -F: drivers/net/ethernet/pasemi/* - -PA SEMI SMBUS DRIVER -L: linux-i2c@vger.kernel.org -S: Orphan -F: drivers/i2c/busses/i2c-pasemi.c - PACKING M: Vladimir Oltean L: netdev@vger.kernel.org diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index f35730548e4293..f310c32e88a4ec 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -239,10 +239,8 @@ KBUILD_CFLAGS += $(call cc-option,-mno-vsx) KBUILD_CFLAGS += $(call cc-option,-mno-spe) KBUILD_CFLAGS += $(call cc-option,-mspe=no) -# FIXME: the module load should be taught about the additional relocs -# generated by this. -# revert to pre-gcc-4.4 behaviour of .eh_frame -KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) +# Don't emit .eh_frame since we have no use for it +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Never use string load/store instructions as they are # often slow when they are implemented at all @@ -298,6 +296,7 @@ $(BOOT_TARGETS2): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) +PHONY += bootwrapper_install bootwrapper_install: $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) @@ -403,9 +402,11 @@ define archhelp @echo ' (minus the .dts extension).' endef +PHONY += install install: $(Q)$(MAKE) $(build)=$(boot) install +PHONY += vdso_install vdso_install: ifdef CONFIG_PPC64 $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ @@ -425,6 +426,7 @@ archheaders: ifdef CONFIG_STACKPROTECTOR prepare: stack_protector_prepare +PHONY += stack_protector_prepare stack_protector_prepare: prepare0 ifdef CONFIG_PPC64 $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' include/generated/asm-offsets.h)) @@ -436,10 +438,12 @@ endif ifdef CONFIG_SMP prepare: task_cpu_prepare +PHONY += task_cpu_prepare task_cpu_prepare: prepare0 $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TASK_CPU") print $$3;}' include/generated/asm-offsets.h)) endif +PHONY += checkbin # Check toolchain versions: # - gcc-4.6 is the minimum kernel-wide version so nothing required. checkbin: diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 0556bf4fc9e996..c53a1b8bba8be3 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -445,6 +445,8 @@ install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) zInstall: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^ +PHONY += install zInstall + # anything not in $(targets) clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \ zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \ diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 92608f34d3123a..1d83966f5ef640 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -44,9 +44,6 @@ p_end: .long _end p_pstack: .long _platform_stack_top #endif - .globl _zimage_start - /* Clang appears to require the .weak directive to be after the symbol - * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921 */ .weak _zimage_start _zimage_start: .globl _zimage_start_lib diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 983c0084fb3fa2..7d81e86a1e5dd3 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -97,6 +97,10 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, unsigned long __init early_init(unsigned long dt_ptr); void __init machine_init(u64 dt_ptr); #endif +long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); +notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs); +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr); +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr); long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low); @@ -104,14 +108,6 @@ long sys_switch_endian(void); notrace unsigned int __check_irq_replay(void); void notrace restore_interrupts(void); -/* ptrace */ -long do_syscall_trace_enter(struct pt_regs *regs); -void do_syscall_trace_leave(struct pt_regs *regs); - -/* process */ -void restore_math(struct pt_regs *regs); -void restore_tm_state(struct pt_regs *regs); - /* prom_init (OpenFirmware) */ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long pp, @@ -122,9 +118,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, void __init early_setup(unsigned long dt_ptr); void early_setup_secondary(void); -/* time */ -void accumulate_stolen_time(void); - /* misc runtime */ extern u64 __bswapdi2(u64); extern s64 __lshrdi3(s64, int); diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h index 2a0a467d29855a..34a7215ae81ece 100644 --- a/arch/powerpc/include/asm/book3s/32/hash.h +++ b/arch/powerpc/include/asm/book3s/32/hash.h @@ -17,9 +17,9 @@ * updating the accessed and modified bits in the page table tree. */ -#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ -#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ -#define _PAGE_USER 0x004 /* usermode access allowed */ +#define _PAGE_USER 0x001 /* usermode access allowed */ +#define _PAGE_RW 0x002 /* software: user write access allowed */ +#define _PAGE_PRESENT 0x004 /* software: pte contains a translation */ #define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ #define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ #define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ @@ -27,7 +27,7 @@ #define _PAGE_DIRTY 0x080 /* C: page changed */ #define _PAGE_ACCESSED 0x100 /* R: page referenced */ #define _PAGE_EXEC 0x200 /* software: exec allowed */ -#define _PAGE_RW 0x400 /* software: user write access allowed */ +#define _PAGE_HASHPTE 0x400 /* hash_page has made an HPTE for this pte */ #define _PAGE_SPECIAL 0x800 /* software: Special page */ #ifdef CONFIG_PTE_64BIT diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 5b39c11e884a96..7549393c4c4369 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -366,10 +366,8 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, addr) \ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr)) -#define pte_offset_map(dir, addr) \ - ((pte_t *)(kmap_atomic(pmd_page(*(dir))) + \ - (pmd_page_vaddr(*(dir)) & ~PAGE_MASK)) + pte_index(addr)) -#define pte_unmap(pte) kunmap_atomic(pte) +#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) +static inline void pte_unmap(pte_t *pte) { } /* * Encode and decode a swap entry. diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 8fd8599c9395c7..3f9ae3585ab98f 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -156,6 +156,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, extern int hash__has_transparent_hugepage(void); #endif +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + BUG(); + return pmd; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index d1d9177d9ebdda..0729c034e56f04 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -246,7 +246,7 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, */ static inline int hash__pmd_trans_huge(pmd_t pmd) { - return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) == + return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)) == (_PAGE_PTE | H_PAGE_THP_HUGE)); } @@ -272,6 +272,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 90dd3a3fc8c77b..3bcef989a35dfc 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H #include +#include #define AMR_KUAP_BLOCK_READ UL(0x4000000000000000) #define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000) @@ -56,7 +57,20 @@ #ifdef CONFIG_PPC_KUAP -#include +#include +#include + +static inline void kuap_restore_amr(struct pt_regs *regs) +{ + if (mmu_has_feature(MMU_FTR_RADIX_KUAP)) + mtspr(SPRN_AMR, regs->kuap); +} + +static inline void kuap_check_amr(void) +{ + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP)) + WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); +} /* * We support individually allowing read or write, but we don't support nesting @@ -127,6 +141,14 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); } +#else /* CONFIG_PPC_KUAP */ +static inline void kuap_restore_amr(struct pt_regs *regs) +{ +} + +static inline void kuap_check_amr(void) +{ +} #endif /* CONFIG_PPC_KUAP */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 201a69e6a355c5..368b136517e04e 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1303,7 +1303,9 @@ extern void serialize_against_pte_lookup(struct mm_struct *mm); static inline pmd_t pmd_mkdevmap(pmd_t pmd) { - return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); + if (radix_enabled()) + return radix__pmd_mkdevmap(pmd); + return hash__pmd_mkdevmap(pmd); } static inline int pmd_devmap(pmd_t pmd) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index d97db3ad9aae21..a1c60d5b50af7d 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -263,6 +263,11 @@ static inline int radix__has_transparent_hugepage(void) } #endif +static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); +} + extern int __meminit radix__vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 72b81015cebe9a..609cab1d58f285 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -97,7 +97,7 @@ static inline u32 l1_icache_bytes(void) #endif -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(.data..read_mostly) #ifdef CONFIG_PPC_BOOK3S_32 extern long _get_L2CR(void); diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 4a1c9f0200e1bf..e92191b390f310 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -65,17 +65,13 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop) unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; - if (IS_ENABLED(CONFIG_PPC64)) { + if (IS_ENABLED(CONFIG_PPC64)) mb(); /* sync */ - isync(); - } for (i = 0; i < size >> shift; i++, addr += bytes) dcbf(addr); mb(); /* sync */ - if (IS_ENABLED(CONFIG_PPC64)) - isync(); } /* diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 2431b4ada2fa7e..0fccd5ea1e9a27 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -43,9 +43,12 @@ static inline unsigned long cputime_to_usecs(const cputime_t ct) */ #ifdef CONFIG_PPC64 #define get_accounting(tsk) (&get_paca()->accounting) +#define raw_get_accounting(tsk) (&local_paca->accounting) static inline void arch_vtime_task_switch(struct task_struct *tsk) { } + #else #define get_accounting(tsk) (&task_thread_info(tsk)->accounting) +#define raw_get_accounting(tsk) get_accounting(tsk) /* * Called from the context switch with interrupts disabled, to charge all * accumulated times to the current process, and to prepare accounting on @@ -60,6 +63,36 @@ static inline void arch_vtime_task_switch(struct task_struct *prev) } #endif +/* + * account_cpu_user_entry/exit runs "unreconciled", so can't trace, + * can't use use get_paca() + */ +static notrace inline void account_cpu_user_entry(void) +{ + unsigned long tb = mftb(); + struct cpu_accounting_data *acct = raw_get_accounting(current); + + acct->utime += (tb - acct->starttime_user); + acct->starttime = tb; +} + +static notrace inline void account_cpu_user_exit(void) +{ + unsigned long tb = mftb(); + struct cpu_accounting_data *acct = raw_get_accounting(current); + + acct->stime += (tb - acct->starttime); + acct->starttime_user = tb; +} + + #endif /* __KERNEL__ */ +#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ +static inline void account_cpu_user_entry(void) +{ +} +static inline void account_cpu_user_exit(void) +{ +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __POWERPC_CPUTIME_H */ diff --git a/arch/powerpc/include/asm/dma.h b/arch/powerpc/include/asm/dma.h index 1b4f0254868f93..6161a9596196f6 100644 --- a/arch/powerpc/include/asm/dma.h +++ b/arch/powerpc/include/asm/dma.h @@ -151,10 +151,9 @@ #define DMA2_EXT_REG 0x4D6 #ifndef __powerpc64__ - /* in arch/ppc/kernel/setup.c -- Cort */ + /* in arch/powerpc/kernel/setup_32.c -- Cort */ extern unsigned int DMA_MODE_WRITE; extern unsigned int DMA_MODE_READ; - extern unsigned long ISA_DMA_THRESHOLD; #else #define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ #define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 3d76e1c388c291..28c3d936fdf32a 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -27,12 +27,12 @@ struct drmem_lmb_info { extern struct drmem_lmb_info *drmem_info; #define for_each_drmem_lmb_in_range(lmb, start, end) \ - for ((lmb) = (start); (lmb) <= (end); (lmb)++) + for ((lmb) = (start); (lmb) < (end); (lmb)++) #define for_each_drmem_lmb(lmb) \ for_each_drmem_lmb_in_range((lmb), \ &drmem_info->lmbs[0], \ - &drmem_info->lmbs[drmem_info->n_lmbs - 1]) + &drmem_info->lmbs[drmem_info->n_lmbs]) /* * The of_drconf_cell_v1 struct defines the layout of the LMB data diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 6f9b2a12540a28..964a54292b365b 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -215,7 +215,7 @@ enum { struct eeh_ops { char *name; int (*init)(void); - void* (*probe)(struct pci_dn *pdn, void *data); + struct eeh_dev *(*probe)(struct pci_dev *pdev); int (*set_option)(struct eeh_pe *pe, int option); int (*get_pe_addr)(struct eeh_pe *pe); int (*get_state)(struct eeh_pe *pe, int *delay); @@ -301,11 +301,7 @@ int __exit eeh_ops_unregister(const char *name); int eeh_check_failure(const volatile void __iomem *token); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_init(void); -void eeh_add_device_early(struct pci_dn *); -void eeh_add_device_tree_early(struct pci_dn *); -void eeh_add_device_late(struct pci_dev *); -void eeh_add_device_tree_late(struct pci_bus *); -void eeh_add_sysfs_files(struct pci_bus *); +void eeh_probe_device(struct pci_dev *pdev); void eeh_remove_device(struct pci_dev *); int eeh_unfreeze_pe(struct eeh_pe *pe); int eeh_pe_reset_and_recover(struct eeh_pe *pe); @@ -360,15 +356,7 @@ static inline int eeh_check_failure(const volatile void __iomem *token) static inline void eeh_addr_cache_init(void) { } -static inline void eeh_add_device_early(struct pci_dn *pdn) { } - -static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { } - -static inline void eeh_add_device_late(struct pci_dev *dev) { } - -static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } - -static inline void eeh_add_sysfs_files(struct pci_bus *bus) { } +static inline void eeh_probe_device(struct pci_dev *dev) { } static inline void eeh_remove_device(struct pci_dev *dev) { } @@ -376,6 +364,14 @@ static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_IO_ERROR_VALUE(size) (-1UL) #endif /* CONFIG_EEH */ +#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_EEH) +void pseries_eeh_init_edev(struct pci_dn *pdn); +void pseries_eeh_init_edev_recursive(struct pci_dn *pdn); +#else +static inline void pseries_eeh_add_device_early(struct pci_dn *pdn) { } +static inline void pseries_eeh_add_device_tree_early(struct pci_dn *pdn) { } +#endif + #ifdef CONFIG_PPC64 /* * MMIO read/write operations with EEH support. diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 33f4f72eb035b9..47bd4ea0837d4c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -33,11 +33,7 @@ #include /* PACA save area size in u64 units (exgen, exmc, etc) */ -#if defined(CONFIG_RELOCATABLE) #define EX_SIZE 10 -#else -#define EX_SIZE 9 -#endif /* * maximum recursive depth of MCE exceptions diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index e3a905e3d5736e..e0e71777961ff8 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -52,7 +52,7 @@ #ifndef __ASSEMBLY__ extern void replay_system_reset(void); -extern void __replay_interrupt(unsigned int vector); +extern void replay_soft_interrupts(void); extern void timer_interrupt(struct pt_regs *); extern void timer_broadcast_interrupt(void); @@ -228,9 +228,13 @@ static inline bool arch_irqs_disabled(void) #ifdef CONFIG_PPC_BOOK3E #define __hard_irq_enable() wrtee(MSR_EE) #define __hard_irq_disable() wrtee(0) +#define __hard_EE_RI_disable() wrtee(0) +#define __hard_RI_enable() do { } while (0) #else #define __hard_irq_enable() __mtmsrd(MSR_EE|MSR_RI, 1) #define __hard_irq_disable() __mtmsrd(MSR_RI, 1) +#define __hard_EE_RI_disable() __mtmsrd(0, 1) +#define __hard_RI_enable() __mtmsrd(MSR_RI, 1) #endif #define hard_irq_disable() do { \ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index f99b4333dfbaeb..1dc63101ffe185 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -309,8 +309,6 @@ struct kvm_arch { pgd_t *pgtable; u64 process_table; struct dentry *debugfs_dir; - struct dentry *htab_dentry; - struct dentry *radix_dentry; struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -831,7 +829,6 @@ struct kvm_vcpu_arch { struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ struct dentry *debugfs_dir; - struct dentry *debugfs_timings; #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ }; diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 6a6ddaabdb34db..376a395daf3299 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -218,6 +218,8 @@ extern void machine_check_queue_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest); unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); +extern void mce_common_process_ue(struct pt_regs *regs, + struct mce_error_info *mce_err); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 60c4d829152e0d..b04ba257fddbc7 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -372,10 +372,8 @@ static inline int pte_young(pte_t pte) #define pte_offset_kernel(dir, addr) \ (pmd_bad(*(dir)) ? NULL : (pte_t *)pmd_page_vaddr(*(dir)) + \ pte_index(addr)) -#define pte_offset_map(dir, addr) \ - ((pte_t *)(kmap_atomic(pmd_page(*(dir))) + \ - (pmd_page_vaddr(*(dir)) & ~PAGE_MASK)) + pte_index(addr)) -#define pte_unmap(pte) kunmap_atomic(pte) +#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) +static inline void pte_unmap(pte_t *pte) { } /* * Encode and decode a swap entry. diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index c1f25a760eb13b..1dffa3cb16bacf 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1067,6 +1067,7 @@ enum { OPAL_REBOOT_PLATFORM_ERROR = 1, OPAL_REBOOT_FULL_IPL = 2, OPAL_REBOOT_MPIPL = 3, + OPAL_REBOOT_FAST = 4, }; /* Argument to OPAL_PCI_TCE_KILL */ diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index 7426d7a90e1e1a..eed3954082fa24 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -32,7 +32,7 @@ do { \ (regs)->result = 0; \ (regs)->nip = __ip; \ - (regs)->gpr[1] = current_stack_pointer(); \ + (regs)->gpr[1] = current_stack_frame(); \ asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \ } while (0) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 8cc543ed114c7d..b1f1d533973559 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -41,6 +41,25 @@ struct mm_struct; #ifndef __ASSEMBLY__ +#ifdef CONFIG_PPC32 +static inline pmd_t *pmd_ptr(struct mm_struct *mm, unsigned long va) +{ + return pmd_offset(pud_offset(pgd_offset(mm, va), va), va); +} + +static inline pmd_t *pmd_ptr_k(unsigned long va) +{ + return pmd_offset(pud_offset(pgd_offset_k(va), va), va); +} + +static inline pte_t *virt_to_kpte(unsigned long vaddr) +{ + pmd_t *pmd = pmd_ptr_k(vaddr); + + return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); +} +#endif + #include /* Keep these as a macros to avoid include dependency mess */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index ee3ada66deb58c..e0195e6b892bcc 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -138,6 +138,9 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define profile_pc(regs) instruction_pointer(regs) #endif +long do_syscall_trace_enter(struct pt_regs *regs); +void do_syscall_trace_leave(struct pt_regs *regs); + #define kernel_stack_pointer(regs) ((regs)->gpr[1]) static inline int is_syscall_success(struct pt_regs *regs) { @@ -276,6 +279,8 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, #endif /* __ASSEMBLY__ */ #ifndef __powerpc64__ +/* We need PT_SOFTE defined at all time to avoid #ifdefs */ +#define PT_SOFTE PT_MQ #else /* __powerpc64__ */ #define PT_FPSCR32 (PT_FPR0 + 2*32 + 1) /* each FP reg occupies 2 32-bit userspace slots */ #define PT_VR0_32 164 /* each Vector reg occupies 4 slots in 32-bit */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1aa46dff095758..da5cab038e25f5 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1448,7 +1448,9 @@ static inline void mtsrin(u32 val, u32 idx) #define proc_trap() asm volatile("trap") -extern unsigned long current_stack_pointer(void); +extern unsigned long current_stack_frame(void); + +register unsigned long current_stack_pointer asm("r1"); extern unsigned long scom970_read(unsigned int address); extern void scom970_write(unsigned int address, unsigned long value); diff --git a/arch/powerpc/include/asm/setjmp.h b/arch/powerpc/include/asm/setjmp.h index e9f81bb3f83b08..f798e80e410619 100644 --- a/arch/powerpc/include/asm/setjmp.h +++ b/arch/powerpc/include/asm/setjmp.h @@ -7,7 +7,9 @@ #define JMP_BUF_LEN 23 -extern long setjmp(long *) __attribute__((returns_twice)); -extern void longjmp(long *, long) __attribute__((noreturn)); +typedef long jmp_buf[JMP_BUF_LEN]; + +extern int setjmp(jmp_buf env) __attribute__((returns_twice)); +extern void longjmp(jmp_buf env, int val) __attribute__((noreturn)); #endif /* _ASM_POWERPC_SETJMP_H */ diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h index 0803ca8b91497e..99e1c6de27bc16 100644 --- a/arch/powerpc/include/asm/signal.h +++ b/arch/powerpc/include/asm/signal.h @@ -6,4 +6,7 @@ #include #include +struct pt_regs; +void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); + #endif /* _ASM_POWERPC_SIGNAL_H */ diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 5b03d8a82409ac..b867b58b109307 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -5,6 +5,7 @@ #ifndef _ASM_POWERPC_SWITCH_TO_H #define _ASM_POWERPC_SWITCH_TO_H +#include #include struct thread_struct; @@ -22,6 +23,16 @@ extern void switch_booke_debug_regs(struct debug_reg *new_debug); extern int emulate_altivec(struct pt_regs *); +#ifdef CONFIG_PPC_BOOK3S_64 +void restore_math(struct pt_regs *regs); +#else +static inline void restore_math(struct pt_regs *regs) +{ +} +#endif + +void restore_tm_state(struct pt_regs *regs); + extern void flush_all_to_thread(struct task_struct *); extern void giveup_all(struct task_struct *); diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 08dbe3e6831c53..39ce95016a3acf 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -24,7 +24,6 @@ extern struct clock_event_device decrementer_clockevent; extern void generic_calibrate_decr(void); -extern void hdec_interrupt(struct pt_regs *regs); /* Some sane defaults: 125 MHz timebase, 1GHz processor */ extern unsigned long ppc_proc_freq; @@ -195,5 +194,8 @@ DECLARE_PER_CPU(u64, decrementers_next_tb); /* Convert timebase ticks to nanoseconds */ unsigned long long tb_to_ns(unsigned long long tb_ticks); +/* SPLPAR */ +void accumulate_stolen_time(void); + #endif /* __KERNEL__ */ #endif /* __POWERPC_TIME_H */ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 2f7e1ea5089e98..2db7ba78972001 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -98,7 +98,6 @@ extern int stop_topology_update(void); extern int prrn_is_enabled(void); extern int find_and_online_cpu_nid(int cpu); extern int timed_topology_update(int nsecs); -extern void __init shared_proc_topology_init(void); #else static inline int start_topology_update(void) { @@ -121,9 +120,6 @@ static inline int timed_topology_update(int nsecs) return 0; } -#ifdef CONFIG_SMP -static inline void shared_proc_topology_init(void) {} -#endif #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ #include @@ -134,7 +130,13 @@ static inline void shared_proc_topology_init(void) {} #ifdef CONFIG_PPC64 #include +#ifdef CONFIG_PPC_SPLPAR +int get_physical_package_id(int cpu); +#define topology_physical_package_id(cpu) (get_physical_package_id(cpu)) +#else #define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu)) +#endif + #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index b5e1f8f8a05c24..2ff884853f975c 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -13,9 +13,6 @@ #define VDSO_VERSION_STRING LINUX_2.6.15 -/* Define if 64 bits VDSO has procedure descriptors */ -#undef VDS64_HAS_DESCRIPTORS - #ifndef __ASSEMBLY__ /* Offsets relative to thread->vdso_base */ @@ -28,25 +25,6 @@ int vdso_getcpu_init(void); #else /* __ASSEMBLY__ */ #ifdef __VDSO64__ -#ifdef VDS64_HAS_DESCRIPTORS -#define V_FUNCTION_BEGIN(name) \ - .globl name; \ - .section ".opd","a"; \ - .align 3; \ - name: \ - .quad .name,.TOC.@tocbase,0; \ - .previous; \ - .globl .name; \ - .type .name,@function; \ - .name: \ - -#define V_FUNCTION_END(name) \ - .size .name,.-.name; - -#define V_LOCAL_FUNC(name) (.name) - -#else /* VDS64_HAS_DESCRIPTORS */ - #define V_FUNCTION_BEGIN(name) \ .globl name; \ name: \ @@ -55,8 +33,6 @@ int vdso_getcpu_init(void); .size name,.-name; #define V_LOCAL_FUNC(name) (name) - -#endif /* VDS64_HAS_DESCRIPTORS */ #endif /* __VDSO64__ */ #ifdef __VDSO32__ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 78a1b22d4fd836..570660efbb3d0e 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -3,8 +3,6 @@ # Makefile for the linux kernel. # -CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' - ifdef CONFIG_PPC64 CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) endif @@ -41,16 +39,17 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif -obj-y := cputable.o ptrace.o syscalls.o \ +obj-y := cputable.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ process.o systbl.o idle.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o -obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ - signal_64.o ptrace32.o \ - paca.o nvram_64.o firmware.o note.o +obj-y += ptrace/ +obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o signal_64.o \ + paca.o nvram_64.o firmware.o note.o \ + syscall_64.o obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 6dfceaa820e42f..f57712a5581571 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -26,7 +26,7 @@ static void scrollscreen(void); #endif -#define __force_data __attribute__((__section__(".data"))) +#define __force_data __section(.data) static int g_loc_X __force_data; static int g_loc_Y __force_data; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 245be4fafe134a..13eba2eb46fe2c 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2198,7 +2198,6 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, */ if (old.oprofile_cpu_type != NULL) { t->oprofile_cpu_type = old.oprofile_cpu_type; - t->oprofile_type = old.oprofile_type; t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } } diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 182b4047c1ef97..36bc0d5c4f3a6c 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -139,7 +139,6 @@ static void __init cpufeatures_setup_cpu(void) /* Initialize the base environment -- clear FSCR/HFSCR. */ hv_mode = !!(mfmsr() & MSR_HV); if (hv_mode) { - /* CPU_FTR_HVMODE is used early in PACA setup */ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; mtspr(SPRN_HFSCR, 0); } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 17cb3e9b5697f6..7cdcb413bb44d8 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1107,87 +1107,43 @@ static int eeh_init(void) core_initcall_sync(eeh_init); /** - * eeh_add_device_early - Enable EEH for the indicated device node - * @pdn: PCI device node for which to set up EEH - * - * This routine must be used to perform EEH initialization for PCI - * devices that were added after system boot (e.g. hotplug, dlpar). - * This routine must be called before any i/o is performed to the - * adapter (inluding any config-space i/o). - * Whether this actually enables EEH or not for this device depends - * on the CEC architecture, type of the device, on earlier boot - * command-line arguments & etc. - */ -void eeh_add_device_early(struct pci_dn *pdn) -{ - struct pci_controller *phb = pdn ? pdn->phb : NULL; - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - - if (!edev) - return; - - if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) - return; - - /* USB Bus children of PCI devices will not have BUID's */ - if (NULL == phb || - (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) - return; - - eeh_ops->probe(pdn, NULL); -} - -/** - * eeh_add_device_tree_early - Enable EEH for the indicated device - * @pdn: PCI device node - * - * This routine must be used to perform EEH initialization for the - * indicated PCI device that was added after system boot (e.g. - * hotplug, dlpar). - */ -void eeh_add_device_tree_early(struct pci_dn *pdn) -{ - struct pci_dn *n; - - if (!pdn) - return; - - list_for_each_entry(n, &pdn->child_list, list) - eeh_add_device_tree_early(n); - eeh_add_device_early(pdn); -} -EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); - -/** - * eeh_add_device_late - Perform EEH initialization for the indicated pci device + * eeh_probe_device() - Perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH * * This routine must be used to complete EEH initialization for PCI * devices that were added after system boot (e.g. hotplug, dlpar). */ -void eeh_add_device_late(struct pci_dev *dev) +void eeh_probe_device(struct pci_dev *dev) { - struct pci_dn *pdn; struct eeh_dev *edev; - if (!dev) + pr_debug("EEH: Adding device %s\n", pci_name(dev)); + + /* + * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was + * already called for this device. + */ + if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) { + pci_dbg(dev, "Already bound to an eeh_dev!\n"); return; + } - pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); - edev = pdn_to_eeh_dev(pdn); - eeh_edev_dbg(edev, "Adding device\n"); - if (edev->pdev == dev) { - eeh_edev_dbg(edev, "Device already referenced!\n"); + edev = eeh_ops->probe(dev); + if (!edev) { + pr_debug("EEH: Adding device failed\n"); return; } /* - * The EEH cache might not be removed correctly because of - * unbalanced kref to the device during unplug time, which - * relies on pcibios_release_device(). So we have to remove - * that here explicitly. + * FIXME: We rely on pcibios_release_device() to remove the + * existing EEH state. The release function is only called if + * the pci_dev's refcount drops to zero so if something is + * keeping a ref to a device (e.g. a filesystem) we need to + * remove the old EEH state. + * + * FIXME: HEY MA, LOOK AT ME, NO LOCKING! */ - if (edev->pdev) { + if (edev->pdev && edev->pdev != dev) { eeh_rmv_from_parent_pe(edev); eeh_addr_cache_rmv_dev(edev->pdev); eeh_sysfs_remove_device(edev->pdev); @@ -1198,68 +1154,15 @@ void eeh_add_device_late(struct pci_dev *dev) * into error handler afterwards. */ edev->mode |= EEH_DEV_NO_HANDLER; - - edev->pdev = NULL; - dev->dev.archdata.edev = NULL; } - if (eeh_has_flag(EEH_PROBE_MODE_DEV)) - eeh_ops->probe(pdn, NULL); - + /* bind the pdev and the edev together */ edev->pdev = dev; dev->dev.archdata.edev = edev; - eeh_addr_cache_insert_dev(dev); + eeh_sysfs_add_device(dev); } -/** - * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus - * @bus: PCI bus - * - * This routine must be used to perform EEH initialization for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. - */ -void eeh_add_device_tree_late(struct pci_bus *bus) -{ - struct pci_dev *dev; - - if (eeh_has_flag(EEH_FORCE_DISABLED)) - return; - list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_add_device_late(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_device_tree_late(subbus); - } - } -} -EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); - -/** - * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus - * @bus: PCI bus - * - * This routine must be used to add EEH sysfs files for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. - */ -void eeh_add_sysfs_files(struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_sysfs_add_device(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_sysfs_files(subbus); - } - } -} -EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); - /** * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 16af0d8d90a864..a6371fb8f7612c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -246,9 +246,8 @@ reenable_mmu: * r3 can be different from GPR3(r1) at this point, r9 and r11 * contains the old MSR and handler address respectively, * r4 & r5 can contain page fault arguments that need to be passed - * along as well. r12, CCR, CTR, XER etc... are left clobbered as - * they aren't useful past this point (aren't syscall arguments), - * the rest is restored from the exception frame. + * along as well. r0, r6-r8, r12, CCR, CTR, XER etc... are left + * clobbered as they aren't useful past this point. */ stwu r1,-32(r1) @@ -262,16 +261,12 @@ reenable_mmu: * lockdep */ 1: bl trace_hardirqs_off -2: lwz r5,24(r1) + lwz r5,24(r1) lwz r4,20(r1) lwz r3,16(r1) lwz r11,12(r1) lwz r9,8(r1) addi r1,r1,32 - lwz r0,GPR0(r1) - lwz r6,GPR6(r1) - lwz r7,GPR7(r1) - lwz r8,GPR8(r1) mtctr r11 mtlr r9 bctr /* jump to handler */ @@ -575,6 +570,33 @@ syscall_exit_work: bl do_syscall_trace_leave b ret_from_except_full + /* + * System call was called from kernel. We get here with SRR1 in r9. + * Mark the exception as recoverable once we have retrieved SRR0, + * trap a warning and return ENOSYS with CR[SO] set. + */ + .globl ret_from_kernel_syscall +ret_from_kernel_syscall: + mfspr r9, SPRN_SRR0 + mfspr r10, SPRN_SRR1 +#if !defined(CONFIG_4xx) && !defined(CONFIG_BOOKE) + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR|MSR_DR)) + mtmsr r11 +#endif + +0: trap + EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING + + li r3, ENOSYS + crset so +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) + mtspr SPRN_NRI, r0 +#endif + mtspr SPRN_SRR0, r9 + mtspr SPRN_SRR1, r10 + SYNC + RFI + /* * The fork/clone functions need to copy the full register set into * the child process. Therefore we need to save all the nonvolatile diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6ba675b0cf7da9..63f0a4414618a2 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -69,6 +70,7 @@ BEGIN_FTR_SECTION bne .Ltabort_syscall END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif +_ASM_NOKPROBE_SYMBOL(system_call_common) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) @@ -76,342 +78,122 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) + std r2,GPR2(r1) #ifdef CONFIG_PPC_FSL_BOOK3E START_BTB_FLUSH_SECTION BTB_FLUSH(r10) END_BTB_FLUSH_SECTION #endif - ACCOUNT_CPU_USER_ENTRY(r13, r10, r11) - std r2,GPR2(r1) + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ std r3,GPR3(r1) - mfcr r2 std r4,GPR4(r1) std r5,GPR5(r1) std r6,GPR6(r1) std r7,GPR7(r1) std r8,GPR8(r1) - li r11,0 + /* Zero r9-r12, this should only be required when restoring all GPRs */ std r11,GPR9(r1) std r11,GPR10(r1) std r11,GPR11(r1) std r11,GPR12(r1) + std r9,GPR13(r1) + SAVE_NVGPRS(r1) std r11,_XER(r1) std r11,_CTR(r1) - std r9,GPR13(r1) mflr r10 + /* * This clears CR0.SO (bit 28), which is the error indication on * return from this system call. */ - rldimi r2,r11,28,(63-28) - li r11,0xc01 + rldimi r12,r11,28,(63-28) + li r11,0xc00 std r10,_LINK(r1) std r11,_TRAP(r1) + std r12,_CCR(r1) std r3,ORIG_GPR3(r1) - std r2,_CCR(r1) - ld r2,PACATOC(r13) - addi r9,r1,STACK_FRAME_OVERHEAD + addi r10,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) - std r11,-16(r9) /* "regshere" marker */ - - kuap_check_amr r10, r11 - -#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) -BEGIN_FW_FTR_SECTION - /* see if there are any DTL entries to process */ - ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */ - ld r11,PACA_DTL_RIDX(r13) /* get log read index */ - addi r10,r10,LPPACA_DTLIDX - LDX_BE r10,0,r10 /* get log write index */ - cmpd r11,r10 - beq+ 33f - bl accumulate_stolen_time - REST_GPR(0,r1) - REST_4GPRS(3,r1) - REST_2GPRS(7,r1) - addi r9,r1,STACK_FRAME_OVERHEAD -33: -END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */ - - /* - * A syscall should always be called with interrupts enabled - * so we just unconditionally hard-enable here. When some kind - * of irq tracing is used, we additionally check that condition - * is correct - */ -#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) - lbz r10,PACAIRQSOFTMASK(r13) -1: tdnei r10,IRQS_ENABLED - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING -#endif - -#ifdef CONFIG_PPC_BOOK3E - wrteei 1 -#else - li r11,MSR_RI - ori r11,r11,MSR_EE - mtmsrd r11,1 -#endif /* CONFIG_PPC_BOOK3E */ - -system_call: /* label this so stack traces look sane */ - /* We do need to set SOFTE in the stack frame or the return - * from interrupt will be painful - */ - li r10,IRQS_ENABLED - std r10,SOFTE(r1) - - ld r11, PACA_THREAD_INFO(r13) - ld r10,TI_FLAGS(r11) - andi. r11,r10,_TIF_SYSCALL_DOTRACE - bne .Lsyscall_dotrace /* does not return */ - cmpldi 0,r0,NR_syscalls - bge- .Lsyscall_enosys + std r11,-16(r10) /* "regshere" marker */ -.Lsyscall: -/* - * Need to vector to 32 Bit or default sys_call_table here, - * based on caller's run-mode / personality. - */ - ld r11,SYS_CALL_TABLE@toc(2) - andis. r10,r10,_TIF_32BIT@h - beq 15f - ld r11,COMPAT_SYS_CALL_TABLE@toc(2) - clrldi r3,r3,32 - clrldi r4,r4,32 - clrldi r5,r5,32 - clrldi r6,r6,32 - clrldi r7,r7,32 - clrldi r8,r8,32 -15: - slwi r0,r0,3 - - barrier_nospec_asm /* - * Prevent the load of the handler below (based on the user-passed - * system call number) being speculatively executed until the test - * against NR_syscalls and branch to .Lsyscall_enosys above has - * committed. + * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which + * would clobber syscall parameters. Also we always enter with IRQs + * enabled and nothing pending. system_call_exception() will call + * trace_hardirqs_off(). */ + li r11,IRQS_ALL_DISABLED + li r12,PACA_IRQ_HARD_DIS + stb r11,PACAIRQSOFTMASK(r13) + stb r12,PACAIRQHAPPENED(r13) - ldx r12,r11,r0 /* Fetch system call handler [ptr] */ - mtctr r12 - bctrl /* Call handler */ + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception - /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */ .Lsyscall_exit: - std r3,RESULT(r1) - -#ifdef CONFIG_DEBUG_RSEQ - /* Check whether the syscall is issued inside a restartable sequence */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl rseq_syscall - ld r3,RESULT(r1) -#endif - - ld r12, PACA_THREAD_INFO(r13) - - ld r8,_MSR(r1) - -/* - * This is a few instructions into the actual syscall exit path (which actually - * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the - * number of visible symbols for profiling purposes. - * - * We can probe from system_call until this point as MSR_RI is set. But once it - * is cleared below, we won't be able to take a trap. - * - * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL(). - */ -system_call_exit: - /* - * Disable interrupts so current_thread_info()->flags can't change, - * and so that we don't get interrupted after loading SRR0/1. - * - * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we - * could fault on the load of the TI_FLAGS below. - */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 0 -#else - li r11,MSR_RI - mtmsrd r11,1 -#endif /* CONFIG_PPC_BOOK3E */ + addi r4,r1,STACK_FRAME_OVERHEAD + bl syscall_exit_prepare - ld r9,TI_FLAGS(r12) - li r11,-MAX_ERRNO - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) - bne- .Lsyscall_exit_work + ld r2,_CCR(r1) + ld r4,_NIP(r1) + ld r5,_MSR(r1) + ld r6,_LINK(r1) - andi. r0,r8,MSR_FP - beq 2f -#ifdef CONFIG_ALTIVEC - andis. r0,r8,MSR_VEC@h - bne 3f -#endif -2: addi r3,r1,STACK_FRAME_OVERHEAD - bl restore_math - ld r8,_MSR(r1) - ld r3,RESULT(r1) - li r11,-MAX_ERRNO - -3: cmpld r3,r11 - ld r5,_CCR(r1) - bge- .Lsyscall_error -.Lsyscall_error_cont: - ld r7,_NIP(r1) BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - andi. r6,r8,MSR_PR - ld r4,_LINK(r1) - kuap_check_amr r10, r11 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 + mtlr r6 -#ifdef CONFIG_PPC_BOOK3S - /* - * Clear MSR_RI, MSR_EE is already and remains disabled. We could do - * this later, but testing shows that doing it here causes less slow - * down than doing it closer to the rfid. - */ + cmpdi r3,0 + bne .Lsyscall_restore_regs + /* Zero volatile regs that may contain sensitive kernel data */ + li r0,0 + li r4,0 + li r5,0 + li r6,0 + li r7,0 + li r8,0 + li r9,0 + li r10,0 li r11,0 - mtmsrd r11,1 -#endif - - beq- 1f - ACCOUNT_CPU_USER_EXIT(r13, r11, r12) + li r12,0 + mtctr r0 + mtspr SPRN_XER,r0 +.Lsyscall_restore_regs_cont: BEGIN_FTR_SECTION HMT_MEDIUM_LOW END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - std r8, PACATMSCRATCH(r13) -#endif - /* * We don't need to restore AMR on the way back to userspace for KUAP. * The value of AMR only matters while we're in the kernel. */ - ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ + mtcr r2 ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) ld r1,GPR1(r1) - mtlr r4 - mtcr r5 - mtspr SPRN_SRR0,r7 - mtspr SPRN_SRR1,r8 RFI_TO_USER b . /* prevent speculative execution */ -1: /* exit to kernel */ - kuap_restore_amr r2 - - ld r2,GPR2(r1) - ld r1,GPR1(r1) - mtlr r4 - mtcr r5 - mtspr SPRN_SRR0,r7 - mtspr SPRN_SRR1,r8 - RFI_TO_KERNEL - b . /* prevent speculative execution */ - -.Lsyscall_error: - oris r5,r5,0x1000 /* Set SO bit in CR */ - neg r3,r3 - std r5,_CCR(r1) - b .Lsyscall_error_cont - -/* Traced system call support */ -.Lsyscall_dotrace: - bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_syscall_trace_enter - - /* - * We use the return value of do_syscall_trace_enter() as the syscall - * number. If the syscall was rejected for any reason do_syscall_trace_enter() - * returns an invalid syscall number and the test below against - * NR_syscalls will fail. - */ - mr r0,r3 - - /* Restore argument registers just clobbered and/or possibly changed. */ - ld r3,GPR3(r1) - ld r4,GPR4(r1) - ld r5,GPR5(r1) - ld r6,GPR6(r1) - ld r7,GPR7(r1) - ld r8,GPR8(r1) - - /* Repopulate r9 and r10 for the syscall path */ - addi r9,r1,STACK_FRAME_OVERHEAD - ld r10, PACA_THREAD_INFO(r13) - ld r10,TI_FLAGS(r10) - - cmpldi r0,NR_syscalls - blt+ .Lsyscall - - /* Return code is already in r3 thanks to do_syscall_trace_enter() */ - b .Lsyscall_exit - - -.Lsyscall_enosys: - li r3,-ENOSYS - b .Lsyscall_exit - -.Lsyscall_exit_work: - /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. - If TIF_NOERROR is set, just save r3 as it is. */ - - andi. r0,r9,_TIF_RESTOREALL - beq+ 0f +.Lsyscall_restore_regs: + ld r3,_CTR(r1) + ld r4,_XER(r1) REST_NVGPRS(r1) - b 2f -0: cmpld r3,r11 /* r11 is -MAX_ERRNO */ - blt+ 1f - andi. r0,r9,_TIF_NOERROR - bne- 1f - ld r5,_CCR(r1) - neg r3,r3 - oris r5,r5,0x1000 /* Set SO bit in CR */ - std r5,_CCR(r1) -1: std r3,GPR3(r1) -2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) - beq 4f - - /* Clear per-syscall TIF flags if any are set. */ - - li r11,_TIF_PERSYSCALL_MASK - addi r12,r12,TI_FLAGS -3: ldarx r10,0,r12 - andc r10,r10,r11 - stdcx. r10,0,r12 - bne- 3b - subi r12,r12,TI_FLAGS - -4: /* Anything else left to do? */ -BEGIN_FTR_SECTION - lis r3,DEFAULT_PPR@highest /* Set default PPR */ - sldi r3,r3,32 /* bits 11-13 are used for ppr */ - std r3,_PPR(r1) -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP) - beq ret_from_except_lite - - /* Re-enable interrupts */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 1 -#else - li r10,MSR_RI - ori r10,r10,MSR_EE - mtmsrd r10,1 -#endif /* CONFIG_PPC_BOOK3E */ - - bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_syscall_trace_leave - b ret_from_except + mtctr r3 + mtspr SPRN_XER,r4 + ld r0,GPR0(r1) + REST_8GPRS(4, r1) + ld r12,GPR12(r1) + b .Lsyscall_restore_regs_cont #ifdef CONFIG_PPC_TRANSACTIONAL_MEM .Ltabort_syscall: @@ -439,64 +221,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) RFI_TO_USER b . /* prevent speculative execution */ #endif -_ASM_NOKPROBE_SYMBOL(system_call_common); -_ASM_NOKPROBE_SYMBOL(system_call_exit); - -/* Save non-volatile GPRs, if not already saved. */ -_GLOBAL(save_nvgprs) - ld r11,_TRAP(r1) - andi. r0,r11,1 - beqlr- - SAVE_NVGPRS(r1) - clrrdi r0,r11,1 - std r0,_TRAP(r1) - blr -_ASM_NOKPROBE_SYMBOL(save_nvgprs); - - -/* - * The sigsuspend and rt_sigsuspend system calls can call do_signal - * and thus put the process into the stopped state where we might - * want to examine its user state with ptrace. Therefore we need - * to save all the nonvolatile registers (r14 - r31) before calling - * the C code. Similarly, fork, vfork and clone need the full - * register state on the stack so that it can be copied to the child. - */ - -_GLOBAL(ppc_fork) - bl save_nvgprs - bl sys_fork - b .Lsyscall_exit - -_GLOBAL(ppc_vfork) - bl save_nvgprs - bl sys_vfork - b .Lsyscall_exit - -_GLOBAL(ppc_clone) - bl save_nvgprs - bl sys_clone - b .Lsyscall_exit - -_GLOBAL(ppc_clone3) - bl save_nvgprs - bl sys_clone3 - b .Lsyscall_exit - -_GLOBAL(ppc32_swapcontext) - bl save_nvgprs - bl compat_sys_swapcontext - b .Lsyscall_exit - -_GLOBAL(ppc64_swapcontext) - bl save_nvgprs - bl sys_swapcontext - b .Lsyscall_exit - -_GLOBAL(ppc_switch_endian) - bl save_nvgprs - bl sys_switch_endian - b .Lsyscall_exit _GLOBAL(ret_from_fork) bl schedule_tail @@ -516,6 +240,19 @@ _GLOBAL(ret_from_kernel_thread) li r3,0 b .Lsyscall_exit +#ifdef CONFIG_PPC_BOOK3E +/* Save non-volatile GPRs, if not already saved. */ +_GLOBAL(save_nvgprs) + ld r11,_TRAP(r1) + andi. r0,r11,1 + beqlr- + SAVE_NVGPRS(r1) + clrrdi r0,r11,1 + std r0,_TRAP(r1) + blr +_ASM_NOKPROBE_SYMBOL(save_nvgprs); +#endif + #ifdef CONFIG_PPC_BOOK3S_64 #define FLUSH_COUNT_CACHE \ @@ -578,7 +315,7 @@ flush_count_cache: * state of one is saved on its kernel stack. Then the state * of the other is restored from its kernel stack. The memory * management hardware is updated to the second process's state. - * Finally, we can return to the second process, via ret_from_except. + * Finally, we can return to the second process, via interrupt_return. * On entry, r3 points to the THREAD for the current task, r4 * points to the THREAD for the new task. * @@ -730,408 +467,146 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) addi r1,r1,SWITCH_FRAME_SIZE blr - .align 7 -_GLOBAL(ret_from_except) - ld r11,_TRAP(r1) - andi. r0,r11,1 - bne ret_from_except_lite - REST_NVGPRS(r1) - -_GLOBAL(ret_from_except_lite) - /* - * Disable interrupts so that current_thread_info()->flags - * can't change between when we test it and when we return - * from the interrupt. - */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 0 -#else - li r10,MSR_RI - mtmsrd r10,1 /* Update machine state */ -#endif /* CONFIG_PPC_BOOK3E */ - - ld r9, PACA_THREAD_INFO(r13) - ld r3,_MSR(r1) -#ifdef CONFIG_PPC_BOOK3E - ld r10,PACACURRENT(r13) -#endif /* CONFIG_PPC_BOOK3E */ - ld r4,TI_FLAGS(r9) - andi. r3,r3,MSR_PR - beq resume_kernel -#ifdef CONFIG_PPC_BOOK3E - lwz r3,(THREAD+THREAD_DBCR0)(r10) -#endif /* CONFIG_PPC_BOOK3E */ - - /* Check current_thread_info()->flags */ - andi. r0,r4,_TIF_USER_WORK_MASK - bne 1f -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3S /* - * Check to see if the dbcr0 register is set up to debug. - * Use the internal debug mode bit to do this. - */ - andis. r0,r3,DBCR0_IDM@h - beq restore - mfmsr r0 - rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */ - mtmsr r0 - mtspr SPRN_DBCR0,r3 - li r10, -1 - mtspr SPRN_DBSR,r10 - b restore -#else + * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not + * touched, AMR not set, no exit work created, then this can be used. + */ + .balign IFETCH_ALIGN_BYTES + .globl fast_interrupt_return +fast_interrupt_return: +_ASM_NOKPROBE_SYMBOL(fast_interrupt_return) + ld r4,_MSR(r1) + andi. r0,r4,MSR_PR + bne .Lfast_user_interrupt_return + andi. r0,r4,MSR_RI + li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ + bne+ .Lfast_kernel_interrupt_return addi r3,r1,STACK_FRAME_OVERHEAD - bl restore_math - b restore -#endif -1: andi. r0,r4,_TIF_NEED_RESCHED - beq 2f - bl restore_interrupts - SCHEDULE_USER - b ret_from_except_lite -2: -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM - bne 3f /* only restore TM if nothing else to do */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl restore_tm_state - b restore -3: -#endif - bl save_nvgprs - /* - * Use a non volatile GPR to save and restore our thread_info flags - * across the call to restore_interrupts. - */ - mr r30,r4 - bl restore_interrupts - mr r4,r30 + bl unrecoverable_exception + b . /* should not get here */ + + .balign IFETCH_ALIGN_BYTES + .globl interrupt_return +interrupt_return: +_ASM_NOKPROBE_SYMBOL(interrupt_return) + ld r4,_MSR(r1) + andi. r0,r4,MSR_PR + beq .Lkernel_interrupt_return addi r3,r1,STACK_FRAME_OVERHEAD - bl do_notify_resume - b ret_from_except - -resume_kernel: - /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ - andis. r8,r4,_TIF_EMULATE_STACK_STORE@h - beq+ 1f + bl interrupt_exit_user_prepare + cmpdi r3,0 + bne- .Lrestore_nvgprs - addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ +.Lfast_user_interrupt_return: + ld r11,_NIP(r1) + ld r12,_MSR(r1) +BEGIN_FTR_SECTION + ld r10,_PPR(r1) + mtspr SPRN_PPR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 - ld r3,GPR1(r1) - subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ - mr r4,r1 /* src: current exception frame */ - mr r1,r3 /* Reroute the trampoline frame to r1 */ +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - /* Copy from the original to the trampoline. */ - li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */ - li r6,0 /* start offset: 0 */ - mtctr r5 -2: ldx r0,r6,r4 - stdx r0,r6,r3 - addi r6,r6,8 - bdnz 2b - - /* Do real store operation to complete stdu */ - ld r5,GPR1(r1) - std r8,0(r5) - - /* Clear _TIF_EMULATE_STACK_STORE flag */ - lis r11,_TIF_EMULATE_STACK_STORE@h - addi r5,r9,TI_FLAGS -0: ldarx r4,0,r5 - andc r4,r4,r11 - stdcx. r4,0,r5 - bne- 0b -1: - -#ifdef CONFIG_PREEMPTION - /* Check if we need to preempt */ - andi. r0,r4,_TIF_NEED_RESCHED - beq+ restore - /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) - cmpwi cr0,r8,0 - bne restore - ld r0,SOFTE(r1) - andi. r0,r0,IRQS_DISABLED - bne restore + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + li r0,0 - /* - * Here we are preempting the current task. We want to make - * sure we are soft-disabled first and reconcile irq state. - */ - RECONCILE_IRQ_STATE(r3,r4) - bl preempt_schedule_irq + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) + REST_GPR(13, r1) - /* - * arch_local_irq_restore() from preempt_schedule_irq above may - * enable hard interrupt but we really should disable interrupts - * when we return from the interrupt, and so that we don't get - * interrupted after loading SRR0/1. - */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 0 -#else - li r10,MSR_RI - mtmsrd r10,1 /* Update machine state */ -#endif /* CONFIG_PPC_BOOK3E */ -#endif /* CONFIG_PREEMPTION */ + mtcr r3 + mtlr r4 + mtctr r5 + mtspr SPRN_XER,r6 - .globl fast_exc_return_irq -fast_exc_return_irq: -restore: - /* - * This is the main kernel exit path. First we check if we - * are about to re-enable interrupts - */ - ld r5,SOFTE(r1) - lbz r6,PACAIRQSOFTMASK(r13) - andi. r5,r5,IRQS_DISABLED - bne .Lrestore_irq_off + REST_4GPRS(2, r1) + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + RFI_TO_USER + b . /* prevent speculative execution */ - /* We are enabling, were we already enabled ? Yes, just return */ - andi. r6,r6,IRQS_DISABLED - beq cr0,.Ldo_restore +.Lrestore_nvgprs: + REST_NVGPRS(r1) + b .Lfast_user_interrupt_return - /* - * We are about to soft-enable interrupts (we are hard disabled - * at this point). We check if there's anything that needs to - * be replayed first. - */ - lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 - bne- .Lrestore_check_irq_replay + .balign IFETCH_ALIGN_BYTES +.Lkernel_interrupt_return: + addi r3,r1,STACK_FRAME_OVERHEAD + bl interrupt_exit_kernel_prepare - /* - * Get here when nothing happened while soft-disabled, just - * soft-enable and move-on. We will hard-enable as a side - * effect of rfi - */ -.Lrestore_no_replay: - TRACE_ENABLE_INTS - li r0,IRQS_ENABLED - stb r0,PACAIRQSOFTMASK(r13); +.Lfast_kernel_interrupt_return: + cmpdi cr1,r3,0 + ld r11,_NIP(r1) + ld r12,_MSR(r1) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 - /* - * Final return path. BookE is handled in a different file - */ -.Ldo_restore: -#ifdef CONFIG_PPC_BOOK3E - b exception_return_book3e -#else - /* - * Clear the reservation. If we know the CPU tracks the address of - * the reservation then we can potentially save some cycles and use - * a larx. On POWER6 and POWER7 this is significantly faster. - */ BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ FTR_SECTION_ELSE - ldarx r4,0,r1 + ldarx r0,0,r1 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - /* - * Some code path such as load_up_fpu or altivec return directly - * here. They run entirely hard disabled and do not alter the - * interrupt state. They also don't use lwarx/stwcx. and thus - * are known not to leave dangling reservations. - */ - .globl fast_exception_return -fast_exception_return: - ld r3,_MSR(r1) + ld r3,_LINK(r1) ld r4,_CTR(r1) - ld r0,_LINK(r1) - mtctr r4 - mtlr r0 - ld r4,_XER(r1) - mtspr SPRN_XER,r4 - - kuap_check_amr r5, r6 - - REST_8GPRS(5, r1) - - andi. r0,r3,MSR_RI - beq- .Lunrecov_restore - - /* - * Clear RI before restoring r13. If we are returning to - * userspace and we take an exception after restoring r13, - * we end up corrupting the userspace r13 value. - */ - li r4,0 - mtmsrd r4,1 - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* TM debug */ - std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */ -#endif - /* - * r13 is our per cpu area, only restore it if we are returning to - * userspace the value stored in the stack frame may belong to - * another CPU. - */ - andi. r0,r3,MSR_PR - beq 1f -BEGIN_FTR_SECTION - /* Restore PPR */ - ld r2,_PPR(r1) - mtspr SPRN_PPR,r2 -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - ACCOUNT_CPU_USER_EXIT(r13, r2, r4) - REST_GPR(13, r1) - - /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. - */ - mtspr SPRN_SRR1,r3 - - ld r2,_CCR(r1) - mtcrf 0xFF,r2 - ld r2,_NIP(r1) - mtspr SPRN_SRR0,r2 - - ld r0,GPR0(r1) - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r4,GPR4(r1) - ld r1,GPR1(r1) - RFI_TO_USER - b . /* prevent speculative execution */ + ld r5,_XER(r1) + ld r6,_CCR(r1) + li r0,0 -1: mtspr SPRN_SRR1,r3 + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) - ld r2,_CCR(r1) - mtcrf 0xFF,r2 - ld r2,_NIP(r1) - mtspr SPRN_SRR0,r2 + mtlr r3 + mtctr r4 + mtspr SPRN_XER,r5 /* * Leaving a stale exception_marker on the stack can confuse * the reliable stack unwinder later on. Clear it. */ - li r2,0 - std r2,STACK_FRAME_OVERHEAD-16(r1) - - ld r0,GPR0(r1) - ld r2,GPR2(r1) - ld r3,GPR3(r1) + std r0,STACK_FRAME_OVERHEAD-16(r1) - kuap_restore_amr r4 + REST_4GPRS(2, r1) - ld r4,GPR4(r1) - ld r1,GPR1(r1) + bne- cr1,1f /* emulate stack store */ + mtcr r6 + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) RFI_TO_KERNEL b . /* prevent speculative execution */ -#endif /* CONFIG_PPC_BOOK3E */ - - /* - * We are returning to a context with interrupts soft disabled. - * - * However, we may also about to hard enable, so we need to - * make sure that in this case, we also clear PACA_IRQ_HARD_DIS - * or that bit can get out of sync and bad things will happen - */ -.Lrestore_irq_off: - ld r3,_MSR(r1) - lbz r7,PACAIRQHAPPENED(r13) - andi. r0,r3,MSR_EE - beq 1f - rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS - stb r7,PACAIRQHAPPENED(r13) -1: -#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) - /* The interrupt should not have soft enabled. */ - lbz r7,PACAIRQSOFTMASK(r13) -1: tdeqi r7,IRQS_ENABLED - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING -#endif - b .Ldo_restore - - /* - * Something did happen, check if a re-emit is needed - * (this also clears paca->irq_happened) - */ -.Lrestore_check_irq_replay: - /* XXX: We could implement a fast path here where we check - * for irq_happened being just 0x01, in which case we can - * clear it and return. That means that we would potentially - * miss a decrementer having wrapped all the way around. - * - * Still, this might be useful for things like hash_page - */ - bl __check_irq_replay - cmpwi cr0,r3,0 - beq .Lrestore_no_replay - - /* - * We need to re-emit an interrupt. We do so by re-using our - * existing exception frame. We first change the trap value, - * but we need to ensure we preserve the low nibble of it - */ - ld r4,_TRAP(r1) - clrldi r4,r4,60 - or r4,r4,r3 - std r4,_TRAP(r1) - - /* - * PACA_IRQ_HARD_DIS won't always be set here, so set it now - * to reconcile the IRQ state. Tracing is already accounted for. - */ - lbz r4,PACAIRQHAPPENED(r13) - ori r4,r4,PACA_IRQ_HARD_DIS - stb r4,PACAIRQHAPPENED(r13) - - /* - * Then find the right handler and call it. Interrupts are - * still soft-disabled and we keep them that way. - */ - cmpwi cr0,r3,0x500 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl do_IRQ - b ret_from_except -1: cmpwi cr0,r3,0xf00 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl performance_monitor_exception - b ret_from_except -1: cmpwi cr0,r3,0xe60 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl handle_hmi_exception - b ret_from_except -1: cmpwi cr0,r3,0x900 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl timer_interrupt - b ret_from_except -#ifdef CONFIG_PPC_DOORBELL -1: -#ifdef CONFIG_PPC_BOOK3E - cmpwi cr0,r3,0x280 -#else - cmpwi cr0,r3,0xa00 -#endif /* CONFIG_PPC_BOOK3E */ - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl doorbell_exception -#endif /* CONFIG_PPC_DOORBELL */ -1: b ret_from_except /* What else to do here ? */ - -.Lunrecov_restore: - addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b .Lunrecov_restore - -_ASM_NOKPROBE_SYMBOL(ret_from_except); -_ASM_NOKPROBE_SYMBOL(ret_from_except_lite); -_ASM_NOKPROBE_SYMBOL(resume_kernel); -_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq); -_ASM_NOKPROBE_SYMBOL(restore); -_ASM_NOKPROBE_SYMBOL(fast_exception_return); +1: /* + * Emulate stack store with update. New r1 value was already calculated + * and updated in our interrupt regs by emulate_loadstore, but we can't + * store the previous value of r1 to the stack before re-loading our + * registers from it, otherwise they could be clobbered. Use + * PACA_EXGEN as temporary storage to hold the store data, as + * interrupts are disabled here so it won't be clobbered. + */ + mtcr r6 + std r9,PACA_EXGEN+0(r13) + addi r9,r1,INT_FRAME_SIZE /* get original r1 */ + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + std r9,0(r1) /* perform store component of stdu */ + ld r9,PACA_EXGEN+0(r13) + RFI_TO_KERNEL + b . /* prevent speculative execution */ +#endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_RTAS /* diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index e4076e3c072db3..d9ed79415100f9 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -24,6 +24,7 @@ #include #include #include +#include /* XXX This will ultimately add space for a special exception save * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... @@ -1002,38 +1003,6 @@ masked_interrupt_book3e_0x280: masked_interrupt_book3e_0x2c0: masked_interrupt_book3e PACA_IRQ_DBELL 0 -/* - * Called from arch_local_irq_enable when an interrupt needs - * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280 - * to indicate the kind of interrupt. MSR:EE is already off. - * We generate a stackframe like if a real interrupt had happened. - * - * Note: While MSR:EE is off, we need to make sure that _MSR - * in the generated frame has EE set to 1 or the exception - * handler will not properly re-enable them. - */ -_GLOBAL(__replay_interrupt) - /* We are going to jump to the exception common code which - * will retrieve various register values from the PACA which - * we don't give a damn about. - */ - mflr r10 - mfmsr r11 - mfcr r4 - mtspr SPRN_SPRG_GEN_SCRATCH,r13; - std r1,PACA_EXGEN+EX_R1(r13); - stw r4,PACA_EXGEN+EX_CR(r13); - ori r11,r11,MSR_EE - subi r1,r1,INT_FRAME_SIZE; - cmpwi cr0,r3,0x500 - beq exc_0x500_common - cmpwi cr0,r3,0x900 - beq exc_0x900_common - cmpwi cr0,r3,0x280 - beq exc_0x280_common - blr - - /* * This is called from 0x300 and 0x400 handlers after the prologs with * r14 and r15 containing the fault address and error code, with the @@ -1073,17 +1042,161 @@ alignment_more: bl alignment_exception b ret_from_except -/* - * We branch here from entry_64.S for the last stage of the exception - * return code path. MSR:EE is expected to be off at that point - */ -_GLOBAL(exception_return_book3e) - b 1f + .align 7 +_GLOBAL(ret_from_except) + ld r11,_TRAP(r1) + andi. r0,r11,1 + bne ret_from_except_lite + REST_NVGPRS(r1) + +_GLOBAL(ret_from_except_lite) + /* + * Disable interrupts so that current_thread_info()->flags + * can't change between when we test it and when we return + * from the interrupt. + */ + wrteei 0 + + ld r9, PACA_THREAD_INFO(r13) + ld r3,_MSR(r1) + ld r10,PACACURRENT(r13) + ld r4,TI_FLAGS(r9) + andi. r3,r3,MSR_PR + beq resume_kernel + lwz r3,(THREAD+THREAD_DBCR0)(r10) + + /* Check current_thread_info()->flags */ + andi. r0,r4,_TIF_USER_WORK_MASK + bne 1f + /* + * Check to see if the dbcr0 register is set up to debug. + * Use the internal debug mode bit to do this. + */ + andis. r0,r3,DBCR0_IDM@h + beq restore + mfmsr r0 + rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */ + mtmsr r0 + mtspr SPRN_DBCR0,r3 + li r10, -1 + mtspr SPRN_DBSR,r10 + b restore +1: andi. r0,r4,_TIF_NEED_RESCHED + beq 2f + bl restore_interrupts + SCHEDULE_USER + b ret_from_except_lite +2: + bl save_nvgprs + /* + * Use a non volatile GPR to save and restore our thread_info flags + * across the call to restore_interrupts. + */ + mr r30,r4 + bl restore_interrupts + mr r4,r30 + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_notify_resume + b ret_from_except + +resume_kernel: + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ + andis. r8,r4,_TIF_EMULATE_STACK_STORE@h + beq+ 1f + + addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ + + ld r3,GPR1(r1) + subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ + mr r4,r1 /* src: current exception frame */ + mr r1,r3 /* Reroute the trampoline frame to r1 */ + + /* Copy from the original to the trampoline. */ + li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */ + li r6,0 /* start offset: 0 */ + mtctr r5 +2: ldx r0,r6,r4 + stdx r0,r6,r3 + addi r6,r6,8 + bdnz 2b + + /* Do real store operation to complete stdu */ + ld r5,GPR1(r1) + std r8,0(r5) + + /* Clear _TIF_EMULATE_STACK_STORE flag */ + lis r11,_TIF_EMULATE_STACK_STORE@h + addi r5,r9,TI_FLAGS +0: ldarx r4,0,r5 + andc r4,r4,r11 + stdcx. r4,0,r5 + bne- 0b +1: + +#ifdef CONFIG_PREEMPT + /* Check if we need to preempt */ + andi. r0,r4,_TIF_NEED_RESCHED + beq+ restore + /* Check that preempt_count() == 0 and interrupts are enabled */ + lwz r8,TI_PREEMPT(r9) + cmpwi cr0,r8,0 + bne restore + ld r0,SOFTE(r1) + andi. r0,r0,IRQS_DISABLED + bne restore + + /* + * Here we are preempting the current task. We want to make + * sure we are soft-disabled first and reconcile irq state. + */ + RECONCILE_IRQ_STATE(r3,r4) + bl preempt_schedule_irq + + /* + * arch_local_irq_restore() from preempt_schedule_irq above may + * enable hard interrupt but we really should disable interrupts + * when we return from the interrupt, and so that we don't get + * interrupted after loading SRR0/1. + */ + wrteei 0 +#endif /* CONFIG_PREEMPT */ + +restore: + /* + * This is the main kernel exit path. First we check if we + * are about to re-enable interrupts + */ + ld r5,SOFTE(r1) + lbz r6,PACAIRQSOFTMASK(r13) + andi. r5,r5,IRQS_DISABLED + bne .Lrestore_irq_off + + /* We are enabling, were we already enabled ? Yes, just return */ + andi. r6,r6,IRQS_DISABLED + beq cr0,fast_exception_return + + /* + * We are about to soft-enable interrupts (we are hard disabled + * at this point). We check if there's anything that needs to + * be replayed first. + */ + lbz r0,PACAIRQHAPPENED(r13) + cmpwi cr0,r0,0 + bne- .Lrestore_check_irq_replay + + /* + * Get here when nothing happened while soft-disabled, just + * soft-enable and move-on. We will hard-enable as a side + * effect of rfi + */ +.Lrestore_no_replay: + TRACE_ENABLE_INTS + li r0,IRQS_ENABLED + stb r0,PACAIRQSOFTMASK(r13); /* This is the return from load_up_fpu fast path which could do with * less GPR restores in fact, but for now we have a single return path */ - .globl fast_exception_return fast_exception_return: wrteei 0 1: mr r0,r13 @@ -1124,6 +1237,102 @@ fast_exception_return: mfspr r13,SPRN_SPRG_GEN_SCRATCH rfi + /* + * We are returning to a context with interrupts soft disabled. + * + * However, we may also about to hard enable, so we need to + * make sure that in this case, we also clear PACA_IRQ_HARD_DIS + * or that bit can get out of sync and bad things will happen + */ +.Lrestore_irq_off: + ld r3,_MSR(r1) + lbz r7,PACAIRQHAPPENED(r13) + andi. r0,r3,MSR_EE + beq 1f + rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS + stb r7,PACAIRQHAPPENED(r13) +1: +#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) + /* The interrupt should not have soft enabled. */ + lbz r7,PACAIRQSOFTMASK(r13) +1: tdeqi r7,IRQS_ENABLED + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING +#endif + b fast_exception_return + + /* + * Something did happen, check if a re-emit is needed + * (this also clears paca->irq_happened) + */ +.Lrestore_check_irq_replay: + /* XXX: We could implement a fast path here where we check + * for irq_happened being just 0x01, in which case we can + * clear it and return. That means that we would potentially + * miss a decrementer having wrapped all the way around. + * + * Still, this might be useful for things like hash_page + */ + bl __check_irq_replay + cmpwi cr0,r3,0 + beq .Lrestore_no_replay + + /* + * We need to re-emit an interrupt. We do so by re-using our + * existing exception frame. We first change the trap value, + * but we need to ensure we preserve the low nibble of it + */ + ld r4,_TRAP(r1) + clrldi r4,r4,60 + or r4,r4,r3 + std r4,_TRAP(r1) + + /* + * PACA_IRQ_HARD_DIS won't always be set here, so set it now + * to reconcile the IRQ state. Tracing is already accounted for. + */ + lbz r4,PACAIRQHAPPENED(r13) + ori r4,r4,PACA_IRQ_HARD_DIS + stb r4,PACAIRQHAPPENED(r13) + + /* + * Then find the right handler and call it. Interrupts are + * still soft-disabled and we keep them that way. + */ + cmpwi cr0,r3,0x500 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl do_IRQ + b ret_from_except +1: cmpwi cr0,r3,0xf00 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl performance_monitor_exception + b ret_from_except +1: cmpwi cr0,r3,0xe60 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl handle_hmi_exception + b ret_from_except +1: cmpwi cr0,r3,0x900 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl timer_interrupt + b ret_from_except +#ifdef CONFIG_PPC_DOORBELL +1: + cmpwi cr0,r3,0x280 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl doorbell_exception +#endif /* CONFIG_PPC_DOORBELL */ +1: b ret_from_except /* What else to do here ? */ + +_ASM_NOKPROBE_SYMBOL(ret_from_except); +_ASM_NOKPROBE_SYMBOL(ret_from_except_lite); +_ASM_NOKPROBE_SYMBOL(resume_kernel); +_ASM_NOKPROBE_SYMBOL(restore); +_ASM_NOKPROBE_SYMBOL(fast_exception_return); + /* * Trampolines used when spotting a bad kernel stack pointer in * the exception entry code. diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ffc15f4f079ddb..18bbce143084b1 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -32,16 +32,10 @@ #define EX_CCR 52 #define EX_CFAR 56 #define EX_PPR 64 -#if defined(CONFIG_RELOCATABLE) #define EX_CTR 72 .if EX_SIZE != 10 .error "EX_SIZE is wrong" .endif -#else -.if EX_SIZE != 9 - .error "EX_SIZE is wrong" -.endif -#endif /* * Following are fixed section helper macros. @@ -50,7 +44,6 @@ * EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors * TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these) * TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use) - * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated * EXC_COMMON - After switching to virtual, relocated mode. */ @@ -80,13 +73,6 @@ name: #define TRAMP_VIRT_BEGIN(name) \ FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name) -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#define TRAMP_KVM_BEGIN(name) \ - TRAMP_VIRT_BEGIN(name) -#else -#define TRAMP_KVM_BEGIN(name) -#endif - #define EXC_REAL_NONE(start, size) \ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size) @@ -119,67 +105,6 @@ name: ori reg,reg,(ABS_ADDR(label))@l; \ addis reg,reg,(ABS_ADDR(label))@h -/* Exception register prefixes */ -#define EXC_HV_OR_STD 2 /* depends on HVMODE */ -#define EXC_HV 1 -#define EXC_STD 0 - -#if defined(CONFIG_RELOCATABLE) -/* - * If we support interrupts with relocation on AND we're a relocatable kernel, - * we need to use CTR to get to the 2nd level handler. So, save/restore it - * when required. - */ -#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13) -#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13) -#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg -#else -/* ...else CTR is unused and in register. */ -#define SAVE_CTR(reg, area) -#define GET_CTR(reg, area) mfctr reg -#define RESTORE_CTR(reg, area) -#endif - -/* - * PPR save/restore macros used in exceptions-64s.S - * Used for P7 or later processors - */ -#define SAVE_PPR(area, ra) \ -BEGIN_FTR_SECTION_NESTED(940) \ - ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \ - std ra,_PPR(r1); \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940) - -#define RESTORE_PPR_PACA(area, ra) \ -BEGIN_FTR_SECTION_NESTED(941) \ - ld ra,area+EX_PPR(r13); \ - mtspr SPRN_PPR,ra; \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941) - -/* - * Get an SPR into a register if the CPU has the given feature - */ -#define OPT_GET_SPR(ra, spr, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - mfspr ra,spr; \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - -/* - * Set an SPR from a register if the CPU has the given feature - */ -#define OPT_SET_SPR(ra, spr, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - mtspr spr,ra; \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - -/* - * Save a register to the PACA if the CPU has the given feature - */ -#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - std ra,offset(r13); \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - /* * Branch to label using its 0xC000 address. This results in instruction * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned @@ -193,89 +118,199 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtctr reg; \ bctr -.macro INT_KVM_HANDLER name, vec, hsrr, area, skip - TRAMP_KVM_BEGIN(\name\()_kvm) - KVM_HANDLER \vec, \hsrr, \area, \skip +/* + * Interrupt code generation macros + */ +#define IVEC .L_IVEC_\name\() /* Interrupt vector address */ +#define IHSRR .L_IHSRR_\name\() /* Sets SRR or HSRR registers */ +#define IHSRR_IF_HVMODE .L_IHSRR_IF_HVMODE_\name\() /* HSRR if HV else SRR */ +#define IAREA .L_IAREA_\name\() /* PACA save area */ +#define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */ +#define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */ +#define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */ +#define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */ +#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */ +#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */ +#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */ +#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */ +#define IKVM_SKIP .L_IKVM_SKIP_\name\() /* Generate KVM skip handler */ +#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */ +#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name +#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */ +#define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */ +#define __ISTACK(name) .L_ISTACK_ ## name +#define IRECONCILE .L_IRECONCILE_\name\() /* Do RECONCILE_IRQ_STATE */ +#define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */ + +#define INT_DEFINE_BEGIN(n) \ +.macro int_define_ ## n name + +#define INT_DEFINE_END(n) \ +.endm ; \ +int_define_ ## n n ; \ +do_define_int n + +.macro do_define_int name + .ifndef IVEC + .error "IVEC not defined" + .endif + .ifndef IHSRR + IHSRR=0 + .endif + .ifndef IHSRR_IF_HVMODE + IHSRR_IF_HVMODE=0 + .endif + .ifndef IAREA + IAREA=PACA_EXGEN + .endif + .ifndef IVIRT + IVIRT=1 + .endif + .ifndef IISIDE + IISIDE=0 + .endif + .ifndef IDAR + IDAR=0 + .endif + .ifndef IDSISR + IDSISR=0 + .endif + .ifndef ISET_RI + ISET_RI=1 + .endif + .ifndef IBRANCH_TO_COMMON + IBRANCH_TO_COMMON=1 + .endif + .ifndef IREALMODE_COMMON + IREALMODE_COMMON=0 + .else + .if ! IBRANCH_TO_COMMON + .error "IREALMODE_COMMON=1 but IBRANCH_TO_COMMON=0" + .endif + .endif + .ifndef IMASK + IMASK=0 + .endif + .ifndef IKVM_SKIP + IKVM_SKIP=0 + .endif + .ifndef IKVM_REAL + IKVM_REAL=0 + .endif + .ifndef IKVM_VIRT + IKVM_VIRT=0 + .endif + .ifndef ISTACK + ISTACK=1 + .endif + .ifndef IRECONCILE + IRECONCILE=1 + .endif + .ifndef IKUAP + IKUAP=1 + .endif .endm #ifdef CONFIG_KVM_BOOK3S_64_HANDLER #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* - * If hv is possible, interrupts come into to the hv version - * of the kvmppc_interrupt code, which then jumps to the PR handler, - * kvmppc_interrupt_pr, if the guest is a PR guest. + * All interrupts which set HSRR registers, as well as SRESET and MCE and + * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken, + * so they all generally need to test whether they were taken in guest context. + * + * Note: SRESET and MCE may also be sent to the guest by the hypervisor, and be + * taken with MSR[HV]=0. + * + * Interrupts which set SRR registers (with the above exceptions) do not + * elevate to MSR[HV]=1 mode, though most can be taken when running with + * MSR[HV]=1 (e.g., bare metal kernel and userspace). So these interrupts do + * not need to test whether a guest is running because they get delivered to + * the guest directly, including nested HV KVM guests. + * + * The exception is PR KVM, where the guest runs with MSR[PR]=1 and the host + * runs with MSR[HV]=0, so the host takes all interrupts on behalf of the + * guest. PR KVM runs with LPCR[AIL]=0 which causes interrupts to always be + * delivered to the real-mode entry point, therefore such interrupts only test + * KVM in their real mode handlers, and only when PR KVM is possible. + * + * Interrupts that are taken in MSR[HV]=0 and escalate to MSR[HV]=1 are always + * delivered in real-mode when the MMU is in hash mode because the MMU + * registers are not set appropriately to translate host addresses. In nested + * radix mode these can be delivered in virt-mode as the host translations are + * used implicitly (see: effective LPID, effective PID). + */ + +/* + * If an interrupt is taken while a guest is running, it is immediately routed + * to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first + * to kvmppc_interrupt_hv, which handles the PR guest case. */ #define kvmppc_interrupt kvmppc_interrupt_hv #else #define kvmppc_interrupt kvmppc_interrupt_pr #endif -.macro KVMTEST name, hsrr, n +.macro KVMTEST name lbz r10,HSTATE_IN_GUEST(r13) cmpwi r10,0 bne \name\()_kvm .endm -.macro KVM_HANDLER vec, hsrr, area, skip - .if \skip +.macro GEN_KVM name + .balign IFETCH_ALIGN_BYTES +\name\()_kvm: + + .if IKVM_SKIP cmpwi r10,KVM_GUEST_MODE_SKIP beq 89f .else -BEGIN_FTR_SECTION_NESTED(947) - ld r10,\area+EX_CFAR(r13) +BEGIN_FTR_SECTION + ld r10,IAREA+EX_CFAR(r13) std r10,HSTATE_CFAR(r13) -END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .endif -BEGIN_FTR_SECTION_NESTED(948) - ld r10,\area+EX_PPR(r13) + ld r10,PACA_EXGEN+EX_CTR(r13) + mtctr r10 +BEGIN_FTR_SECTION + ld r10,IAREA+EX_PPR(r13) std r10,HSTATE_PPR(r13) -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) - ld r10,\area+EX_R10(r13) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + ld r11,IAREA+EX_R11(r13) + ld r12,IAREA+EX_R12(r13) std r12,HSTATE_SCRATCH0(r13) sldi r12,r9,32 + ld r9,IAREA+EX_R9(r13) + ld r10,IAREA+EX_R10(r13) /* HSRR variants have the 0x2 bit added to their trap number */ - .if \hsrr == EXC_HV_OR_STD + .if IHSRR_IF_HVMODE BEGIN_FTR_SECTION - ori r12,r12,(\vec + 0x2) + ori r12,r12,(IVEC + 0x2) FTR_SECTION_ELSE - ori r12,r12,(\vec) + ori r12,r12,(IVEC) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - ori r12,r12,(\vec + 0x2) + .elseif IHSRR + ori r12,r12,(IVEC+ 0x2) .else - ori r12,r12,(\vec) + ori r12,r12,(IVEC) .endif - -#ifdef CONFIG_RELOCATABLE - /* - * KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives - * outside the head section. CONFIG_RELOCATABLE KVM expects CTR - * to be saved in HSTATE_SCRATCH1. - */ - mfctr r9 - std r9,HSTATE_SCRATCH1(r13) - __LOAD_FAR_HANDLER(r9, kvmppc_interrupt) - mtctr r9 - ld r9,\area+EX_R9(r13) - bctr -#else - ld r9,\area+EX_R9(r13) b kvmppc_interrupt -#endif - - .if \skip + .if IKVM_SKIP 89: mtocrf 0x80,r9 - ld r9,\area+EX_R9(r13) - ld r10,\area+EX_R10(r13) - .if \hsrr == EXC_HV_OR_STD + ld r10,PACA_EXGEN+EX_CTR(r13) + mtctr r10 + ld r9,IAREA+EX_R9(r13) + ld r10,IAREA+EX_R10(r13) + ld r11,IAREA+EX_R11(r13) + ld r12,IAREA+EX_R12(r13) + .if IHSRR_IF_HVMODE BEGIN_FTR_SECTION b kvmppc_skip_Hinterrupt FTR_SECTION_ELSE b kvmppc_skip_interrupt ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr + .elseif IHSRR b kvmppc_skip_Hinterrupt .else b kvmppc_skip_interrupt @@ -284,107 +319,12 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .endm #else -.macro KVMTEST name, hsrr, n +.macro KVMTEST name .endm -.macro KVM_HANDLER name, vec, hsrr, area, skip +.macro GEN_KVM name .endm #endif -.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri - ld r10,PACAKMSR(r13) /* get MSR value for kernel */ - .if ! \set_ri - xori r10,r10,MSR_RI /* Clear MSR_RI */ - .endif - .if \hsrr == EXC_HV_OR_STD - BEGIN_FTR_SECTION - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - mtspr SPRN_HSRR1,r10 - FTR_SECTION_ELSE - mfspr r11,SPRN_SRR0 /* save SRR0 */ - mfspr r12,SPRN_SRR1 /* and SRR1 */ - mtspr SPRN_SRR1,r10 - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - mtspr SPRN_HSRR1,r10 - .else - mfspr r11,SPRN_SRR0 /* save SRR0 */ - mfspr r12,SPRN_SRR1 /* and SRR1 */ - mtspr SPRN_SRR1,r10 - .endif - LOAD_HANDLER(r10, \label\()) - .if \hsrr == EXC_HV_OR_STD - BEGIN_FTR_SECTION - mtspr SPRN_HSRR0,r10 - HRFI_TO_KERNEL - FTR_SECTION_ELSE - mtspr SPRN_SRR0,r10 - RFI_TO_KERNEL - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - mtspr SPRN_HSRR0,r10 - HRFI_TO_KERNEL - .else - mtspr SPRN_SRR0,r10 - RFI_TO_KERNEL - .endif - b . /* prevent speculative execution */ -.endm - -/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */ -.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr -#ifdef CONFIG_RELOCATABLE - .if \hsrr == EXC_HV_OR_STD - BEGIN_FTR_SECTION - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - FTR_SECTION_ELSE - mfspr r11,SPRN_SRR0 /* save SRR0 */ - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - .else - mfspr r11,SPRN_SRR0 /* save SRR0 */ - .endif - LOAD_HANDLER(r12, \label\()) - mtctr r12 - .if \hsrr == EXC_HV_OR_STD - BEGIN_FTR_SECTION - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - FTR_SECTION_ELSE - mfspr r12,SPRN_SRR1 /* and HSRR1 */ - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - .else - mfspr r12,SPRN_SRR1 /* and HSRR1 */ - .endif - li r10,MSR_RI - mtmsrd r10,1 /* Set RI (EE=0) */ - bctr -#else - .if \hsrr == EXC_HV_OR_STD - BEGIN_FTR_SECTION - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - FTR_SECTION_ELSE - mfspr r11,SPRN_SRR0 /* save SRR0 */ - mfspr r12,SPRN_SRR1 /* and SRR1 */ - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - .else - mfspr r11,SPRN_SRR0 /* save SRR0 */ - mfspr r12,SPRN_SRR1 /* and SRR1 */ - .endif - li r10,MSR_RI - mtmsrd r10,1 /* Set RI (EE=0) */ - b \label -#endif -.endm - /* * This is the BOOK3S interrupt entry code macro. * @@ -405,14 +345,41 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) * - Fall through and continue executing in real, unrelocated mode. * This is done if early=2. */ -.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0 + +.macro GEN_BRANCH_TO_COMMON name, virt + .if IREALMODE_COMMON + LOAD_HANDLER(r10, \name\()_common) + mtctr r10 + bctr + .else + .if \virt +#ifndef CONFIG_RELOCATABLE + b \name\()_common_virt +#else + LOAD_HANDLER(r10, \name\()_common_virt) + mtctr r10 + bctr +#endif + .else + LOAD_HANDLER(r10, \name\()_common_real) + mtctr r10 + bctr + .endif + .endif +.endm + +.macro GEN_INT_ENTRY name, virt, ool=0 SET_SCRATCH0(r13) /* save r13 */ GET_PACA(r13) - std r9,\area\()+EX_R9(r13) /* save r9 */ - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR) + std r9,IAREA+EX_R9(r13) /* save r9 */ +BEGIN_FTR_SECTION + mfspr r9,SPRN_PPR +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) HMT_MEDIUM - std r10,\area\()+EX_R10(r13) /* save r10 - r12 */ - OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) + std r10,IAREA+EX_R10(r13) /* save r10 - r12 */ +BEGIN_FTR_SECTION + mfspr r10,SPRN_CFAR +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .if \ool .if !\virt b tramp_real_\name @@ -425,47 +392,18 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .endif .endif - OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR) - OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR) +BEGIN_FTR_SECTION + std r9,IAREA+EX_PPR(r13) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) +BEGIN_FTR_SECTION + std r10,IAREA+EX_CFAR(r13) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) INTERRUPT_TO_KERNEL - SAVE_CTR(r10, \area\()) + mfctr r10 + std r10,IAREA+EX_CTR(r13) mfcr r9 - .if \kvm - KVMTEST \name \hsrr \vec - .endif - .if \bitmask - lbz r10,PACAIRQSOFTMASK(r13) - andi. r10,r10,\bitmask - /* Associate vector numbers with bits in paca->irq_happened */ - .if \vec == 0x500 || \vec == 0xea0 - li r10,PACA_IRQ_EE - .elseif \vec == 0x900 - li r10,PACA_IRQ_DEC - .elseif \vec == 0xa00 || \vec == 0xe80 - li r10,PACA_IRQ_DBELL - .elseif \vec == 0xe60 - li r10,PACA_IRQ_HMI - .elseif \vec == 0xf00 - li r10,PACA_IRQ_PMI - .else - .abort "Bad maskable vector" - .endif - - .if \hsrr == EXC_HV_OR_STD - BEGIN_FTR_SECTION - bne masked_Hinterrupt - FTR_SECTION_ELSE - bne masked_interrupt - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - bne masked_Hinterrupt - .else - bne masked_interrupt - .endif - .endif - - std r11,\area\()+EX_R11(r13) - std r12,\area\()+EX_R12(r13) + std r11,IAREA+EX_R11(r13) + std r12,IAREA+EX_R12(r13) /* * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI], @@ -473,49 +411,134 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) * not recoverable if they are live. */ GET_SCRATCH0(r10) - std r10,\area\()+EX_R13(r13) - .if \dar - .if \hsrr + std r10,IAREA+EX_R13(r13) + .if IDAR && !IISIDE + .if IHSRR mfspr r10,SPRN_HDAR .else mfspr r10,SPRN_DAR .endif - std r10,\area\()+EX_DAR(r13) + std r10,IAREA+EX_DAR(r13) .endif - .if \dsisr - .if \hsrr + .if IDSISR && !IISIDE + .if IHSRR mfspr r10,SPRN_HDSISR .else mfspr r10,SPRN_DSISR .endif - stw r10,\area\()+EX_DSISR(r13) + stw r10,IAREA+EX_DSISR(r13) .endif - .if \early == 2 - /* nothing more */ - .elseif \early - mfctr r10 /* save ctr, even for !RELOCATABLE */ - BRANCH_TO_C000(r11, \name\()_early_common) - .elseif !\virt - INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri + .if IHSRR_IF_HVMODE + BEGIN_FTR_SECTION + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + FTR_SECTION_ELSE + mfspr r11,SPRN_SRR0 /* save SRR0 */ + mfspr r12,SPRN_SRR1 /* and SRR1 */ + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + .elseif IHSRR + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ .else - INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr + mfspr r11,SPRN_SRR0 /* save SRR0 */ + mfspr r12,SPRN_SRR1 /* and SRR1 */ .endif + + .if IBRANCH_TO_COMMON + GEN_BRANCH_TO_COMMON \name \virt + .endif + .if \ool .popsection .endif .endm /* - * On entry r13 points to the paca, r9-r13 are saved in the paca, - * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and - * SRR1, and relocation is on. + * __GEN_COMMON_ENTRY is required to receive the branch from interrupt + * entry, except in the case of the real-mode handlers which require + * __GEN_REALMODE_COMMON_ENTRY. * - * If stack=0, then the stack is already set in r1, and r1 is saved in r10. - * PPR save and CPU accounting is not done for the !stack case (XXX why not?) + * This switches to virtual mode and sets MSR[RI]. */ -.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr - .if \stack +.macro __GEN_COMMON_ENTRY name +DEFINE_FIXED_SYMBOL(\name\()_common_real) +\name\()_common_real: + .if IKVM_REAL + KVMTEST \name + .endif + + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + /* MSR[RI] is clear iff using SRR regs */ + .if IHSRR == EXC_HV_OR_STD + BEGIN_FTR_SECTION + xori r10,r10,MSR_RI + END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) + .elseif ! IHSRR + xori r10,r10,MSR_RI + .endif + mtmsrd r10 + + .if IVIRT + .if IKVM_VIRT + b 1f /* skip the virt test coming from real */ + .endif + + .balign IFETCH_ALIGN_BYTES +DEFINE_FIXED_SYMBOL(\name\()_common_virt) +\name\()_common_virt: + .if IKVM_VIRT + KVMTEST \name +1: + .endif + .endif /* IVIRT */ +.endm + +/* + * Don't switch to virt mode. Used for early MCE and HMI handlers that + * want to run in real mode. + */ +.macro __GEN_REALMODE_COMMON_ENTRY name +DEFINE_FIXED_SYMBOL(\name\()_common_real) +\name\()_common_real: + .if IKVM_REAL + KVMTEST \name + .endif +.endm + +.macro __GEN_COMMON_BODY name + .if IMASK + lbz r10,PACAIRQSOFTMASK(r13) + andi. r10,r10,IMASK + /* Associate vector numbers with bits in paca->irq_happened */ + .if IVEC == 0x500 || IVEC == 0xea0 + li r10,PACA_IRQ_EE + .elseif IVEC == 0x900 + li r10,PACA_IRQ_DEC + .elseif IVEC == 0xa00 || IVEC == 0xe80 + li r10,PACA_IRQ_DBELL + .elseif IVEC == 0xe60 + li r10,PACA_IRQ_HMI + .elseif IVEC == 0xf00 + li r10,PACA_IRQ_PMI + .else + .abort "Bad maskable vector" + .endif + + .if IHSRR_IF_HVMODE + BEGIN_FTR_SECTION + bne masked_Hinterrupt + FTR_SECTION_ELSE + bne masked_interrupt + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + .elseif IHSRR + bne masked_Hinterrupt + .else + bne masked_interrupt + .endif + .endif + + .if ISTACK andi. r10,r12,MSR_PR /* See if coming from user */ mr r10,r1 /* Save r1 */ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */ @@ -532,54 +555,67 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) std r0,GPR0(r1) /* save r0 in stackframe */ std r10,GPR1(r1) /* save r1 in stackframe */ - .if \stack - .if \kuap + .if ISET_RI + li r10,MSR_RI + mtmsrd r10,1 /* Set MSR_RI */ + .endif + + .if ISTACK + .if IKUAP kuap_save_amr_and_lock r9, r10, cr1, cr0 .endif beq 101f /* if from kernel mode */ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10) - SAVE_PPR(\area, r9) +BEGIN_FTR_SECTION + ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */ + std r9,_PPR(r1) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 101: .else - .if \kuap + .if IKUAP kuap_save_amr_and_lock r9, r10, cr1 .endif .endif /* Save original regs values from save area to stack frame. */ - ld r9,\area+EX_R9(r13) /* move r9, r10 to stackframe */ - ld r10,\area+EX_R10(r13) + ld r9,IAREA+EX_R9(r13) /* move r9, r10 to stackframe */ + ld r10,IAREA+EX_R10(r13) std r9,GPR9(r1) std r10,GPR10(r1) - ld r9,\area+EX_R11(r13) /* move r11 - r13 to stackframe */ - ld r10,\area+EX_R12(r13) - ld r11,\area+EX_R13(r13) + ld r9,IAREA+EX_R11(r13) /* move r11 - r13 to stackframe */ + ld r10,IAREA+EX_R12(r13) + ld r11,IAREA+EX_R13(r13) std r9,GPR11(r1) std r10,GPR12(r1) std r11,GPR13(r1) - .if \dar - .if \dar == 2 + + SAVE_NVGPRS(r1) + + .if IDAR + .if IISIDE ld r10,_NIP(r1) .else - ld r10,\area+EX_DAR(r13) + ld r10,IAREA+EX_DAR(r13) .endif std r10,_DAR(r1) .endif - .if \dsisr - .if \dsisr == 2 + + .if IDSISR + .if IISIDE ld r10,_MSR(r1) lis r11,DSISR_SRR1_MATCH_64S@h and r10,r10,r11 .else - lwz r10,\area+EX_DSISR(r13) + lwz r10,IAREA+EX_DSISR(r13) .endif std r10,_DSISR(r1) .endif -BEGIN_FTR_SECTION_NESTED(66) - ld r10,\area+EX_CFAR(r13) + +BEGIN_FTR_SECTION + ld r10,IAREA+EX_CFAR(r13) std r10,ORIG_GPR3(r1) -END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66) - GET_CTR(r10, \area) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + ld r10,IAREA+EX_CTR(r13) std r10,_CTR(r1) std r2,GPR2(r1) /* save r2 in stackframe */ SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */ @@ -591,32 +627,42 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66) mfspr r11,SPRN_XER /* save XER in stackframe */ std r10,SOFTE(r1) std r11,_XER(r1) - li r9,(\vec)+1 + li r9,IVEC std r9,_TRAP(r1) /* set trap number */ li r10,0 ld r11,exception_marker@toc(r2) std r10,RESULT(r1) /* clear regs->result */ std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */ - .if \stack + .if ISTACK ACCOUNT_STOLEN_TIME .endif - .if \reconcile + .if IRECONCILE RECONCILE_IRQ_STATE(r10, r11) .endif .endm +/* + * On entry r13 points to the paca, r9-r13 are saved in the paca, + * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and + * SRR1, and relocation is on. + * + * If stack=0, then the stack is already set in r1, and r1 is saved in r10. + * PPR save and CPU accounting is not done for the !stack case (XXX why not?) + */ +.macro GEN_COMMON name + __GEN_COMMON_ENTRY \name + __GEN_COMMON_BODY \name +.endm + /* * Restore all registers including H/SRR0/1 saved in a stack frame of a * standard exception. */ -.macro EXCEPTION_RESTORE_REGS hsrr +.macro EXCEPTION_RESTORE_REGS hsrr=0 /* Move original SRR0 and SRR1 into the respective regs */ ld r9,_MSR(r1) - .if \hsrr == EXC_HV_OR_STD - .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS" - .endif .if \hsrr mtspr SPRN_HSRR1,r9 .else @@ -670,28 +716,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #define FINISH_NAP #endif -#define EXC_COMMON(name, realvec, hdlr) \ - EXC_COMMON_BEGIN(name); \ - INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \ - bl save_nvgprs; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b ret_from_except - -/* - * Like EXC_COMMON, but for exceptions that can occur in the idle task and - * therefore need the special idle handling (finish nap and runlatch) - */ -#define EXC_COMMON_ASYNC(name, realvec, hdlr) \ - EXC_COMMON_BEGIN(name); \ - INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \ - FINISH_NAP; \ - RUNLATCH_ON; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b ret_from_except_lite - - /* * There are a few constraints to be concerned with. * - Real mode exceptions code/data must be located at their physical location. @@ -778,6 +802,53 @@ __start_interrupts: EXC_VIRT_NONE(0x4000, 0x100) +/** + * Interrupt 0x100 - System Reset Interrupt (SRESET aka NMI). + * This is a non-maskable, asynchronous interrupt always taken in real-mode. + * It is caused by: + * - Wake from power-saving state, on powernv. + * - An NMI from another CPU, triggered by firmware or hypercall. + * - As crash/debug signal injected from BMC, firmware or hypervisor. + * + * Handling: + * Power-save wakeup is the only performance critical path, so this is + * determined quickly as possible first. In this case volatile registers + * can be discarded and SPRs like CFAR don't need to be read. + * + * If not a powersave wakeup, then it's run as a regular interrupt, however + * it uses its own stack and PACA save area to preserve the regular kernel + * environment for debugging. + * + * This interrupt is not maskable, so triggering it when MSR[RI] is clear, + * or SCRATCH0 is in use, etc. may cause a crash. It's also not entirely + * correct to switch to virtual mode to run the regular interrupt handler + * because it might be interrupted when the MMU is in a bad state (e.g., SLB + * is clear). + * + * FWNMI: + * PAPR specifies a "fwnmi" facility which sends the sreset to a different + * entry point with a different register set up. Some hypervisors will + * send the sreset to 0x100 in the guest if it is not fwnmi capable. + * + * KVM: + * Unlike most SRR interrupts, this may be taken by the host while executing + * in a guest, so a KVM test is required. KVM will pull the CPU out of guest + * mode and then raise the sreset. + */ +INT_DEFINE_BEGIN(system_reset) + IVEC=0x100 + IAREA=PACA_EXNMI + IVIRT=0 /* no virt entry point */ + /* + * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is + * being used, so a nested NMI exception would corrupt it. + */ + ISET_RI=0 + ISTACK=0 + IRECONCILE=0 + IKVM_REAL=1 +INT_DEFINE_END(system_reset) + EXC_REAL_BEGIN(system_reset, 0x100, 0x100) #ifdef CONFIG_PPC_P7_NAP /* @@ -815,11 +886,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif - INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1 + GEN_INT_ENTRY system_reset, virt=0 /* - * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is - * being used, so a nested NMI exception would corrupt it. - * * In theory, we should not enable relocation here if it was disabled * in SRR1, because the MMU may not be configured to support it (e.g., * SLB may have been cleared). In practice, there should only be a few @@ -828,7 +896,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) */ EXC_REAL_END(system_reset, 0x100, 0x100) EXC_VIRT_NONE(0x4100, 0x100) -INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0 #ifdef CONFIG_PPC_P7_NAP TRAMP_REAL_BEGIN(system_reset_idle_wake) @@ -843,12 +910,14 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake) * Vectors for the FWNMI option. Share common code. */ TRAMP_REAL_BEGIN(system_reset_fwnmi) - /* See comment at system_reset exception, don't turn on RI */ - INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0 + /* XXX: fwnmi guest could run a nested/PR guest, so why no test? */ + __IKVM_REAL(system_reset)=0 + GEN_INT_ENTRY system_reset, virt=0 #endif /* CONFIG_PPC_PSERIES */ EXC_COMMON_BEGIN(system_reset_common) + __GEN_COMMON_ENTRY system_reset /* * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able * to recover, but nested NMI will notice in_nmi and not recover @@ -864,21 +933,21 @@ EXC_COMMON_BEGIN(system_reset_common) mr r10,r1 ld r1,PACA_NMI_EMERG_SP(r13) subi r1,r1,INT_FRAME_SIZE - INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0 - bl save_nvgprs + __GEN_COMMON_BODY system_reset /* - * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does + * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does * the right thing. We do not want to reconcile because that goes * through irq tracing which we don't want in NMI. * - * Save PACAIRQHAPPENED because some code will do a hard disable - * (e.g., xmon). So we want to restore this back to where it was - * when we return. DAR is unused in the stack, so save it there. + * Save PACAIRQHAPPENED to _DAR (otherwise unused), and set HARD_DIS + * as we are running with MSR[EE]=0. */ li r10,IRQS_ALL_DISABLED stb r10,PACAIRQSOFTMASK(r13) lbz r10,PACAIRQHAPPENED(r13) std r10,_DAR(r1) + ori r10,r10,PACA_IRQ_HARD_DIS + stb r10,PACAIRQHAPPENED(r13) addi r3,r1,STACK_FRAME_OVERHEAD bl system_reset_exception @@ -902,28 +971,95 @@ EXC_COMMON_BEGIN(system_reset_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - EXCEPTION_RESTORE_REGS EXC_STD + EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL + GEN_KVM system_reset -EXC_REAL_BEGIN(machine_check, 0x200, 0x100) - INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1 + +/** + * Interrupt 0x200 - Machine Check Interrupt (MCE). + * This is a non-maskable interrupt always taken in real-mode. It can be + * synchronous or asynchronous, caused by hardware or software, and it may be + * taken in a power-saving state. + * + * Handling: + * Similarly to system reset, this uses its own stack and PACA save area, + * the difference is re-entrancy is allowed on the machine check stack. + * + * machine_check_early is run in real mode, and carefully decodes the + * machine check and tries to handle it (e.g., flush the SLB if there was an + * error detected there), determines if it was recoverable and logs the + * event. + * + * This early code does not "reconcile" irq soft-mask state like SRESET or + * regular interrupts do, so irqs_disabled() among other things may not work + * properly (irq disable/enable already doesn't work because irq tracing can + * not work in real mode). + * + * Then, depending on the execution context when the interrupt is taken, there + * are 3 main actions: + * - Executing in kernel mode. The event is queued with irq_work, which means + * it is handled when it is next safe to do so (i.e., the kernel has enabled + * interrupts), which could be immediately when the interrupt returns. This + * avoids nasty issues like switching to virtual mode when the MMU is in a + * bad state, or when executing OPAL code. (SRESET is exposed to such issues, + * but it has different priorities). Check to see if the CPU was in power + * save, and return via the wake up code if it was. + * + * - Executing in user mode. machine_check_exception is run like a normal + * interrupt handler, which processes the data generated by the early handler. + * + * - Executing in guest mode. The interrupt is run with its KVM test, and + * branches to KVM to deal with. KVM may queue the event for the host + * to report later. + * + * This interrupt is not maskable, so if it triggers when MSR[RI] is clear, + * or SCRATCH0 is in use, it may cause a crash. + * + * KVM: + * See SRESET. + */ +INT_DEFINE_BEGIN(machine_check_early) + IVEC=0x200 + IAREA=PACA_EXMC + IVIRT=0 /* no virt entry point */ + IREALMODE_COMMON=1 /* * MSR_RI is not enabled, because PACA_EXMC is being used, so a * nested machine check corrupts it. machine_check_common enables * MSR_RI. */ + ISET_RI=0 + ISTACK=0 + IDAR=1 + IDSISR=1 + IRECONCILE=0 + IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */ +INT_DEFINE_END(machine_check_early) + +INT_DEFINE_BEGIN(machine_check) + IVEC=0x200 + IAREA=PACA_EXMC + IVIRT=0 /* no virt entry point */ + ISET_RI=0 + IDAR=1 + IDSISR=1 + IKVM_SKIP=1 + IKVM_REAL=1 +INT_DEFINE_END(machine_check) + +EXC_REAL_BEGIN(machine_check, 0x200, 0x100) + GEN_INT_ENTRY machine_check_early, virt=0 EXC_REAL_END(machine_check, 0x200, 0x100) EXC_VIRT_NONE(0x4200, 0x100) #ifdef CONFIG_PPC_PSERIES TRAMP_REAL_BEGIN(machine_check_fwnmi) /* See comment at machine_check exception, don't turn on RI */ - INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1 + GEN_INT_ENTRY machine_check_early, virt=0 #endif -INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1 - #define MACHINE_CHECK_HANDLER_WINDUP \ /* Clear MSR_RI before setting SRR0 and SRR1. */\ li r9,0; \ @@ -932,12 +1068,10 @@ INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1 lhz r12,PACA_IN_MCE(r13); \ subi r12,r12,1; \ sth r12,PACA_IN_MCE(r13); \ - EXCEPTION_RESTORE_REGS EXC_STD + EXCEPTION_RESTORE_REGS EXC_COMMON_BEGIN(machine_check_early_common) - mtctr r10 /* Restore ctr */ - mfspr r11,SPRN_SRR0 - mfspr r12,SPRN_SRR1 + __GEN_REALMODE_COMMON_ENTRY machine_check_early /* * Switch to mc_emergency stack and handle re-entrancy (we limit @@ -974,8 +1108,7 @@ EXC_COMMON_BEGIN(machine_check_early_common) bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - /* We don't touch AMR here, we never go to virtual mode */ - INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1 + __GEN_COMMON_BODY machine_check_early BEGIN_FTR_SECTION bl enable_machine_check @@ -983,7 +1116,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) li r10,MSR_RI mtmsrd r10,1 - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early std r3,RESULT(r1) /* Save result */ @@ -1063,23 +1195,25 @@ BEGIN_FTR_SECTION mtspr SPRN_CFAR,r10 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) MACHINE_CHECK_HANDLER_WINDUP - /* See comment at machine_check exception, don't turn on RI */ - INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1 + GEN_INT_ENTRY machine_check, virt=0 EXC_COMMON_BEGIN(machine_check_common) /* * Machine check is different because we use a different * save area: PACA_EXMC instead of PACA_EXGEN. */ - INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1 + GEN_COMMON machine_check + FINISH_NAP /* Enable MSR_RI when finished with PACA_EXMC */ li r10,MSR_RI mtmsrd r10,1 - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception - b ret_from_except + b interrupt_return + + GEN_KVM machine_check + #ifdef CONFIG_PPC_P7_NAP /* @@ -1144,21 +1278,48 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) b . +/** + * Interrupt 0x300 - Data Storage Interrupt (DSI). + * This is a synchronous interrupt generated due to a data access exception, + * e.g., a load orstore which does not have a valid page table entry with + * permissions. DAWR matches also fault here, as do RC updates, and minor misc + * errors e.g., copy/paste, AMO, certain invalid CI accesses, etc. + * + * Handling: + * - Hash MMU + * Go to do_hash_page first to see if the HPT can be filled from an entry in + * the Linux page table. Hash faults can hit in kernel mode in a fairly + * arbitrary state (e.g., interrupts disabled, locks held) when accessing + * "non-bolted" regions, e.g., vmalloc space. However these should always be + * backed by Linux page tables. + * + * If none is found, do a Linux page fault. Linux page faults can happen in + * kernel mode due to user copy operations of course. + * + * - Radix MMU + * The hardware loads from the Linux page table directly, so a fault goes + * immediately to Linux page fault. + * + * Conditions like DAWR match are handled on the way in to Linux page fault. + */ +INT_DEFINE_BEGIN(data_access) + IVEC=0x300 + IDAR=1 + IDSISR=1 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_SKIP=1 + IKVM_REAL=1 +#endif +INT_DEFINE_END(data_access) + EXC_REAL_BEGIN(data_access, 0x300, 0x80) - INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1 + GEN_INT_ENTRY data_access, virt=0 EXC_REAL_END(data_access, 0x300, 0x80) EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) - INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1 + GEN_INT_ENTRY data_access, virt=1 EXC_VIRT_END(data_access, 0x4300, 0x80) -INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1 EXC_COMMON_BEGIN(data_access_common) - /* - * Here r13 points to the paca, r9 contains the saved CR, - * SRR0 and SRR1 are saved in r11 and r12, - * r9 - r13 are saved in paca->exgen. - * EX_DAR and EX_DSISR have saved DAR/DSISR - */ - INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1 + GEN_COMMON data_access ld r4,_DAR(r1) ld r5,_DSISR(r1) BEGIN_MMU_FTR_SECTION @@ -1169,16 +1330,46 @@ MMU_FTR_SECTION_ELSE b handle_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + GEN_KVM data_access + + +/** + * Interrupt 0x380 - Data Segment Interrupt (DSLB). + * This is a synchronous interrupt in response to an MMU fault missing SLB + * entry for HPT, or an address outside RPT translation range. + * + * Handling: + * - HPT: + * This refills the SLB, or reports an access fault similarly to a bad page + * fault. When coming from user-mode, the SLB handler may access any kernel + * data, though it may itself take a DSLB. When coming from kernel mode, + * recursive faults must be avoided so access is restricted to the kernel + * image text/data, kernel stack, and any data allocated below + * ppc64_bolted_size (first segment). The kernel handler must avoid stomping + * on user-handler data structures. + * + * A dedicated save area EXSLB is used (XXX: but it actually need not be + * these days, we could use EXGEN). + */ +INT_DEFINE_BEGIN(data_access_slb) + IVEC=0x380 + IAREA=PACA_EXSLB + IRECONCILE=0 + IDAR=1 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_SKIP=1 + IKVM_REAL=1 +#endif +INT_DEFINE_END(data_access_slb) EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) - INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1 + GEN_INT_ENTRY data_access_slb, virt=0 EXC_REAL_END(data_access_slb, 0x380, 0x80) EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) - INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1 + GEN_INT_ENTRY data_access_slb, virt=1 EXC_VIRT_END(data_access_slb, 0x4380, 0x80) -INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1 EXC_COMMON_BEGIN(data_access_slb_common) - INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0 + GEN_COMMON data_access_slb ld r4,_DAR(r1) addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION @@ -1186,31 +1377,50 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_exception_return + b fast_interrupt_return 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) ld r4,_DAR(r1) ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b ret_from_except + b interrupt_return + + GEN_KVM data_access_slb + +/** + * Interrupt 0x400 - Instruction Storage Interrupt (ISI). + * This is a synchronous interrupt in response to an MMU fault due to an + * instruction fetch. + * + * Handling: + * Similar to DSI, though in response to fetch. The faulting address is found + * in SRR0 (rather than DAR), and status in SRR1 (rather than DSISR). + */ +INT_DEFINE_BEGIN(instruction_access) + IVEC=0x400 + IISIDE=1 + IDAR=1 + IDSISR=1 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(instruction_access) EXC_REAL_BEGIN(instruction_access, 0x400, 0x80) - INT_HANDLER instruction_access, 0x400, kvm=1 + GEN_INT_ENTRY instruction_access, virt=0 EXC_REAL_END(instruction_access, 0x400, 0x80) EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) - INT_HANDLER instruction_access, 0x400, virt=1 + GEN_INT_ENTRY instruction_access, virt=1 EXC_VIRT_END(instruction_access, 0x4400, 0x80) -INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0 EXC_COMMON_BEGIN(instruction_access_common) - INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2 + GEN_COMMON instruction_access ld r4,_DAR(r1) ld r5,_DSISR(r1) BEGIN_MMU_FTR_SECTION @@ -1221,16 +1431,37 @@ MMU_FTR_SECTION_ELSE b handle_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + GEN_KVM instruction_access + + +/** + * Interrupt 0x480 - Instruction Segment Interrupt (ISLB). + * This is a synchronous interrupt in response to an MMU fault due to an + * instruction fetch. + * + * Handling: + * Similar to DSLB, though in response to fetch. The faulting address is found + * in SRR0 (rather than DAR). + */ +INT_DEFINE_BEGIN(instruction_access_slb) + IVEC=0x480 + IAREA=PACA_EXSLB + IRECONCILE=0 + IISIDE=1 + IDAR=1 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(instruction_access_slb) EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) - INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1 + GEN_INT_ENTRY instruction_access_slb, virt=0 EXC_REAL_END(instruction_access_slb, 0x480, 0x80) EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) - INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB + GEN_INT_ENTRY instruction_access_slb, virt=1 EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) -INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0 EXC_COMMON_BEGIN(instruction_access_slb_common) - INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0 + GEN_COMMON instruction_access_slb ld r4,_DAR(r1) addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION @@ -1238,54 +1469,125 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_exception_return + b fast_interrupt_return 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) ld r4,_DAR(r1) ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b ret_from_except + b interrupt_return + + GEN_KVM instruction_access_slb + + +/** + * Interrupt 0x500 - External Interrupt. + * This is an asynchronous maskable interrupt in response to an "external + * exception" from the interrupt controller or hypervisor (e.g., device + * interrupt). It is maskable in hardware by clearing MSR[EE], and + * soft-maskable with IRQS_DISABLED mask (i.e., local_irq_disable()). + * + * When running in HV mode, Linux sets up the LPCR[LPES] bit such that + * interrupts are delivered with HSRR registers, guests use SRRs, which + * reqiures IHSRR_IF_HVMODE. + * + * On bare metal POWER9 and later, Linux sets the LPCR[HVICE] bit such that + * external interrupts are delivered as Hypervisor Virtualization Interrupts + * rather than External Interrupts. + * + * Handling: + * This calls into Linux IRQ handler. NVGPRs are not saved to reduce overhead, + * because registers at the time of the interrupt are not so important as it is + * asynchronous. + * + * If soft masked, the masked handler will note the pending interrupt for + * replay, and clear MSR[EE] in the interrupted context. + */ +INT_DEFINE_BEGIN(hardware_interrupt) + IVEC=0x500 + IHSRR_IF_HVMODE=1 + IMASK=IRQS_DISABLED + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(hardware_interrupt) EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) - INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY hardware_interrupt, virt=0 EXC_REAL_END(hardware_interrupt, 0x500, 0x100) EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) - INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY hardware_interrupt, virt=1 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) -INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0 -EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) +EXC_COMMON_BEGIN(hardware_interrupt_common) + GEN_COMMON hardware_interrupt + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_IRQ + b interrupt_return + + GEN_KVM hardware_interrupt +/** + * Interrupt 0x600 - Alignment Interrupt + * This is a synchronous interrupt in response to data alignment fault. + */ +INT_DEFINE_BEGIN(alignment) + IVEC=0x600 + IDAR=1 + IDSISR=1 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(alignment) + EXC_REAL_BEGIN(alignment, 0x600, 0x100) - INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1 + GEN_INT_ENTRY alignment, virt=0 EXC_REAL_END(alignment, 0x600, 0x100) EXC_VIRT_BEGIN(alignment, 0x4600, 0x100) - INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1 + GEN_INT_ENTRY alignment, virt=1 EXC_VIRT_END(alignment, 0x4600, 0x100) -INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0 EXC_COMMON_BEGIN(alignment_common) - INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1 - bl save_nvgprs + GEN_COMMON alignment addi r3,r1,STACK_FRAME_OVERHEAD bl alignment_exception - b ret_from_except + REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + b interrupt_return + + GEN_KVM alignment +/** + * Interrupt 0x700 - Program Interrupt (program check). + * This is a synchronous interrupt in response to various instruction faults: + * traps, privilege errors, TM errors, floating point exceptions. + * + * Handling: + * This interrupt may use the "emergency stack" in some cases when being taken + * from kernel context, which complicates handling. + */ +INT_DEFINE_BEGIN(program_check) + IVEC=0x700 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(program_check) + EXC_REAL_BEGIN(program_check, 0x700, 0x100) - INT_HANDLER program_check, 0x700, kvm=1 + GEN_INT_ENTRY program_check, virt=0 EXC_REAL_END(program_check, 0x700, 0x100) EXC_VIRT_BEGIN(program_check, 0x4700, 0x100) - INT_HANDLER program_check, 0x700, virt=1 + GEN_INT_ENTRY program_check, virt=1 EXC_VIRT_END(program_check, 0x4700, 0x100) -INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0 EXC_COMMON_BEGIN(program_check_common) + __GEN_COMMON_ENTRY program_check + /* * It's possible to receive a TM Bad Thing type program check with * userspace register values (in particular r1), but with SRR1 reporting @@ -1310,28 +1612,47 @@ EXC_COMMON_BEGIN(program_check_common) mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0 + __ISTACK(program_check)=0 + __GEN_COMMON_BODY program_check b 3f 2: - INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0 + __ISTACK(program_check)=1 + __GEN_COMMON_BODY program_check 3: - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception - b ret_from_except + REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + b interrupt_return + + GEN_KVM program_check +/* + * Interrupt 0x800 - Floating-Point Unavailable Interrupt. + * This is a synchronous interrupt in response to executing an fp instruction + * with MSR[FP]=0. + * + * Handling: + * This will load FP registers and enable the FP bit if coming from userspace, + * otherwise report a bad kernel use of FP. + */ +INT_DEFINE_BEGIN(fp_unavailable) + IVEC=0x800 + IRECONCILE=0 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(fp_unavailable) + EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100) - INT_HANDLER fp_unavailable, 0x800, kvm=1 + GEN_INT_ENTRY fp_unavailable, virt=0 EXC_REAL_END(fp_unavailable, 0x800, 0x100) EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100) - INT_HANDLER fp_unavailable, 0x800, virt=1 + GEN_INT_ENTRY fp_unavailable, virt=1 EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) -INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0 EXC_COMMON_BEGIN(fp_unavailable_common) - INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0 + GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception @@ -1348,64 +1669,168 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif bl load_up_fpu - b fast_exception_return + b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl fp_unavailable_tm - b ret_from_except + b interrupt_return #endif + GEN_KVM fp_unavailable + + +/** + * Interrupt 0x900 - Decrementer Interrupt. + * This is an asynchronous interrupt in response to a decrementer exception + * (e.g., DEC has wrapped below zero). It is maskable in hardware by clearing + * MSR[EE], and soft-maskable with IRQS_DISABLED mask (i.e., + * local_irq_disable()). + * + * Handling: + * This calls into Linux timer handler. NVGPRs are not saved (see 0x500). + * + * If soft masked, the masked handler will note the pending interrupt for + * replay, and bump the decrementer to a high value, leaving MSR[EE] enabled + * in the interrupted context. + * If PPC_WATCHDOG is configured, the soft masked handler will actually set + * things back up to run soft_nmi_interrupt as a regular interrupt handler + * on the emergency stack. + */ +INT_DEFINE_BEGIN(decrementer) + IVEC=0x900 + IMASK=IRQS_DISABLED +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(decrementer) EXC_REAL_BEGIN(decrementer, 0x900, 0x80) - INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY decrementer, virt=0 EXC_REAL_END(decrementer, 0x900, 0x80) EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) - INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED + GEN_INT_ENTRY decrementer, virt=1 EXC_VIRT_END(decrementer, 0x4900, 0x80) -INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0 -EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) +EXC_COMMON_BEGIN(decrementer_common) + GEN_COMMON decrementer + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD + bl timer_interrupt + b interrupt_return + GEN_KVM decrementer + + +/** + * Interrupt 0x980 - Hypervisor Decrementer Interrupt. + * This is an asynchronous interrupt, similar to 0x900 but for the HDEC + * register. + * + * Handling: + * Linux does not use this outside KVM where it's used to keep a host timer + * while the guest is given control of DEC. It should normally be caught by + * the KVM test and routed there. + */ +INT_DEFINE_BEGIN(hdecrementer) + IVEC=0x980 + IHSRR=1 + ISTACK=0 + IRECONCILE=0 + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(hdecrementer) EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80) - INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY hdecrementer, virt=0 EXC_REAL_END(hdecrementer, 0x980, 0x80) EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80) - INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY hdecrementer, virt=1 EXC_VIRT_END(hdecrementer, 0x4980, 0x80) -INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0 -EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) +EXC_COMMON_BEGIN(hdecrementer_common) + __GEN_COMMON_ENTRY hdecrementer + /* + * Hypervisor decrementer interrupts not caught by the KVM test + * shouldn't occur but are sometimes left pending on exit from a KVM + * guest. We don't need to do anything to clear them, as they are + * edge-triggered. + * + * Be careful to avoid touching the kernel stack. + */ + ld r10,PACA_EXGEN+EX_CTR(r13) + mtctr r10 + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + HRFI_TO_KERNEL + + GEN_KVM hdecrementer +/** + * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt. + * This is an asynchronous interrupt in response to a msgsndp doorbell. + * It is maskable in hardware by clearing MSR[EE], and soft-maskable with + * IRQS_DISABLED mask (i.e., local_irq_disable()). + * + * Handling: + * Guests may use this for IPIs between threads in a core if the + * hypervisor supports it. NVGPRS are not saved (see 0x500). + * + * If soft masked, the masked handler will note the pending interrupt for + * replay, leaving MSR[EE] enabled in the interrupted context because the + * doorbells are edge triggered. + */ +INT_DEFINE_BEGIN(doorbell_super) + IVEC=0xa00 + IMASK=IRQS_DISABLED +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(doorbell_super) + EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100) - INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY doorbell_super, virt=0 EXC_REAL_END(doorbell_super, 0xa00, 0x100) EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) - INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED + GEN_INT_ENTRY doorbell_super, virt=1 EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) -INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0 +EXC_COMMON_BEGIN(doorbell_super_common) + GEN_COMMON doorbell_super + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL -EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception) + bl doorbell_exception #else -EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception) + bl unknown_exception #endif + b interrupt_return + + GEN_KVM doorbell_super EXC_REAL_NONE(0xb00, 0x100) EXC_VIRT_NONE(0x4b00, 0x100) -/* - * system call / hypercall (0xc00, 0x4c00) - * - * The system call exception is invoked with "sc 0" and does not alter HV bit. - * - * The hypercall is invoked with "sc 1" and sets HV=1. +/** + * Interrupt 0xc00 - System Call Interrupt (syscall, hcall). + * This is a synchronous interrupt invoked with the "sc" instruction. The + * system call is invoked with "sc 0" and does not alter the HV bit, so it + * is directed to the currently running OS. The hypercall is invoked with + * "sc 1" and it sets HV=1, so it elevates to hypervisor. * * In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to * 0x4c00 virtual mode. * + * Handling: + * If the KVM test fires then it was due to a hypercall and is accordingly + * routed to KVM. Otherwise this executes a normal Linux system call. + * * Call convention: * * syscall and hypercalls register conventions are documented in @@ -1417,6 +1842,12 @@ EXC_VIRT_NONE(0x4b00, 0x100) * without saving, though xer is not a good idea to use, as hardware may * interpret some bits so it may be costly to change them. */ +INT_DEFINE_BEGIN(system_call) + IVEC=0xc00 + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(system_call) + .macro SYSTEM_CALL virt #ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* @@ -1431,7 +1862,7 @@ EXC_VIRT_NONE(0x4b00, 0x100) GET_PACA(r13) std r10,PACA_EXGEN+EX_R10(r13) INTERRUPT_TO_KERNEL - KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */ + KVMTEST system_call /* uses r10, branch to system_call_kvm */ mfctr r9 #else mr r9,r13 @@ -1490,6 +1921,7 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100) EXC_VIRT_END(system_call, 0x4c00, 0x100) #ifdef CONFIG_KVM_BOOK3S_64_HANDLER +TRAMP_REAL_BEGIN(system_call_kvm) /* * This is a hcall, so register convention is as above, with these * differences: @@ -1497,43 +1929,95 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100) * ctr = orig r13 * orig r10 saved in PACA */ -TRAMP_KVM_BEGIN(system_call_kvm) /* * Save the PPR (on systems that support it) before changing to * HMT_MEDIUM. That allows the KVM code to save that value into the * guest state (it is the guest's PPR value). */ - OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR) +BEGIN_FTR_SECTION + mfspr r10,SPRN_PPR + std r10,HSTATE_PPR(r13) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) HMT_MEDIUM - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR) mfctr r10 SET_SCRATCH0(r10) - std r9,PACA_EXGEN+EX_R9(r13) - mfcr r9 - KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0 + mfcr r10 + std r12,HSTATE_SCRATCH0(r13) + sldi r12,r10,32 + ori r12,r12,0xc00 +#ifdef CONFIG_RELOCATABLE + /* + * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives + * outside the head section. + */ + __LOAD_FAR_HANDLER(r10, kvmppc_interrupt) + mtctr r10 + ld r10,PACA_EXGEN+EX_R10(r13) + bctr +#else + ld r10,PACA_EXGEN+EX_R10(r13) + b kvmppc_interrupt +#endif #endif +/** + * Interrupt 0xd00 - Trace Interrupt. + * This is a synchronous interrupt in response to instruction step or + * breakpoint faults. + */ +INT_DEFINE_BEGIN(single_step) + IVEC=0xd00 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(single_step) + EXC_REAL_BEGIN(single_step, 0xd00, 0x100) - INT_HANDLER single_step, 0xd00, kvm=1 + GEN_INT_ENTRY single_step, virt=0 EXC_REAL_END(single_step, 0xd00, 0x100) EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100) - INT_HANDLER single_step, 0xd00, virt=1 + GEN_INT_ENTRY single_step, virt=1 EXC_VIRT_END(single_step, 0x4d00, 0x100) -INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0 -EXC_COMMON(single_step_common, 0xd00, single_step_exception) +EXC_COMMON_BEGIN(single_step_common) + GEN_COMMON single_step + addi r3,r1,STACK_FRAME_OVERHEAD + bl single_step_exception + b interrupt_return + + GEN_KVM single_step + +/** + * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI). + * This is a synchronous interrupt in response to an MMU fault caused by a + * guest data access. + * + * Handling: + * This should always get routed to KVM. In radix MMU mode, this is caused + * by a guest nested radix access that can't be performed due to the + * partition scope page table. In hash mode, this can be caused by guests + * running with translation disabled (virtual real mode) or with VPM enabled. + * KVM will update the page table structures or disallow the access. + */ +INT_DEFINE_BEGIN(h_data_storage) + IVEC=0xe00 + IHSRR=1 + IDAR=1 + IDSISR=1 + IKVM_SKIP=1 + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(h_data_storage) EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20) - INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1 + GEN_INT_ENTRY h_data_storage, virt=0, ool=1 EXC_REAL_END(h_data_storage, 0xe00, 0x20) EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20) - INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1 + GEN_INT_ENTRY h_data_storage, virt=1, ool=1 EXC_VIRT_END(h_data_storage, 0x4e00, 0x20) -INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1 EXC_COMMON_BEGIN(h_data_storage_common) - INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1 - bl save_nvgprs + GEN_COMMON h_data_storage addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION ld r4,_DAR(r1) @@ -1542,56 +2026,125 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl unknown_exception ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) - b ret_from_except + b interrupt_return + + GEN_KVM h_data_storage + +/** + * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI). + * This is a synchronous interrupt in response to an MMU fault caused by a + * guest instruction fetch, similar to HDSI. + */ +INT_DEFINE_BEGIN(h_instr_storage) + IVEC=0xe20 + IHSRR=1 + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(h_instr_storage) EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20) - INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY h_instr_storage, virt=0, ool=1 EXC_REAL_END(h_instr_storage, 0xe20, 0x20) EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20) - INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY h_instr_storage, virt=1, ool=1 EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) -INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0 -EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) +EXC_COMMON_BEGIN(h_instr_storage_common) + GEN_COMMON h_instr_storage + addi r3,r1,STACK_FRAME_OVERHEAD + bl unknown_exception + b interrupt_return + + GEN_KVM h_instr_storage + +/** + * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt. + */ +INT_DEFINE_BEGIN(emulation_assist) + IVEC=0xe40 + IHSRR=1 + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(emulation_assist) EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20) - INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY emulation_assist, virt=0, ool=1 EXC_REAL_END(emulation_assist, 0xe40, 0x20) EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20) - INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY emulation_assist, virt=1, ool=1 EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) -INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0 -EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) +EXC_COMMON_BEGIN(emulation_assist_common) + GEN_COMMON emulation_assist + addi r3,r1,STACK_FRAME_OVERHEAD + bl emulation_assist_interrupt + REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + b interrupt_return + GEN_KVM emulation_assist -/* - * hmi_exception trampoline is a special case. It jumps to hmi_exception_early - * first, and then eventaully from there to the trampoline to get into virtual - * mode. + +/** + * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI). + * This is an asynchronous interrupt caused by a Hypervisor Maintenance + * Exception. It is always taken in real mode but uses HSRR registers + * unlike SRESET and MCE. + * + * It is maskable in hardware by clearing MSR[EE], and partially soft-maskable + * with IRQS_DISABLED mask (i.e., local_irq_disable()). + * + * Handling: + * This is a special case, this is handled similarly to machine checks, with an + * initial real mode handler that is not soft-masked, which attempts to fix the + * problem. Then a regular handler which is soft-maskable and reports the + * problem. + * + * The emergency stack is used for the early real mode handler. + * + * XXX: unclear why MCE and HMI schemes could not be made common, e.g., + * either use soft-masking for the MCE, or use irq_work for the HMI. + * + * KVM: + * Unlike MCE, this calls into KVM without calling the real mode handler + * first. */ +INT_DEFINE_BEGIN(hmi_exception_early) + IVEC=0xe60 + IHSRR=1 + IREALMODE_COMMON=1 + ISTACK=0 + IRECONCILE=0 + IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */ + IKVM_REAL=1 +INT_DEFINE_END(hmi_exception_early) + +INT_DEFINE_BEGIN(hmi_exception) + IVEC=0xe60 + IHSRR=1 + IMASK=IRQS_DISABLED + IKVM_REAL=1 +INT_DEFINE_END(hmi_exception) + EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20) - INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1 + GEN_INT_ENTRY hmi_exception_early, virt=0, ool=1 EXC_REAL_END(hmi_exception, 0xe60, 0x20) EXC_VIRT_NONE(0x4e60, 0x20) -INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0 + EXC_COMMON_BEGIN(hmi_exception_early_common) - mtctr r10 /* Restore ctr */ - mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ + __GEN_REALMODE_COMMON_ENTRY hmi_exception_early + mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - /* We don't touch AMR here, we never go to virtual mode */ - INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0 + __GEN_COMMON_BODY hmi_exception_early addi r3,r1,STACK_FRAME_OVERHEAD bl hmi_exception_realmode cmpdi cr0,r3,0 bne 1f - EXCEPTION_RESTORE_REGS EXC_HV + EXCEPTION_RESTORE_REGS hsrr=1 HRFI_TO_USER_OR_KERNEL 1: @@ -1599,41 +2152,84 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) * Go to virtual mode and pull the HMI event information from * firmware. */ - EXCEPTION_RESTORE_REGS EXC_HV - INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 + EXCEPTION_RESTORE_REGS hsrr=1 + GEN_INT_ENTRY hmi_exception, virt=0 + + GEN_KVM hmi_exception_early EXC_COMMON_BEGIN(hmi_exception_common) - INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON hmi_exception FINISH_NAP RUNLATCH_ON - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception - b ret_from_except + b interrupt_return + + GEN_KVM hmi_exception +/** + * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt. + * This is an asynchronous interrupt in response to a msgsnd doorbell. + * Similar to the 0xa00 doorbell but for host rather than guest. + */ +INT_DEFINE_BEGIN(h_doorbell) + IVEC=0xe80 + IHSRR=1 + IMASK=IRQS_DISABLED + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(h_doorbell) + EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20) - INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY h_doorbell, virt=0, ool=1 EXC_REAL_END(h_doorbell, 0xe80, 0x20) EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) - INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY h_doorbell, virt=1, ool=1 EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) -INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0 +EXC_COMMON_BEGIN(h_doorbell_common) + GEN_COMMON h_doorbell + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL -EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception) + bl doorbell_exception #else -EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception) + bl unknown_exception #endif + b interrupt_return + + GEN_KVM h_doorbell +/** + * Interrupt 0xea0 - Hypervisor Virtualization Interrupt. + * This is an asynchronous interrupt in response to an "external exception". + * Similar to 0x500 but for host only. + */ +INT_DEFINE_BEGIN(h_virt_irq) + IVEC=0xea0 + IHSRR=1 + IMASK=IRQS_DISABLED + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(h_virt_irq) + EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20) - INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY h_virt_irq, virt=0, ool=1 EXC_REAL_END(h_virt_irq, 0xea0, 0x20) EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) - INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY h_virt_irq, virt=1, ool=1 EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) -INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0 -EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ) +EXC_COMMON_BEGIN(h_virt_irq_common) + GEN_COMMON h_virt_irq + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_IRQ + b interrupt_return + + GEN_KVM h_virt_irq EXC_REAL_NONE(0xec0, 0x20) @@ -1642,25 +2238,69 @@ EXC_REAL_NONE(0xee0, 0x20) EXC_VIRT_NONE(0x4ee0, 0x20) +/* + * Interrupt 0xf00 - Performance Monitor Interrupt (PMI, PMU). + * This is an asynchronous interrupt in response to a PMU exception. + * It is maskable in hardware by clearing MSR[EE], and soft-maskable with + * IRQS_PMI_DISABLED mask (NOTE: NOT local_irq_disable()). + * + * Handling: + * This calls into the perf subsystem. + * + * Like the watchdog soft-nmi, it appears an NMI interrupt to Linux, in that it + * runs under local_irq_disable. However it may be soft-masked in + * powerpc-specific code. + * + * If soft masked, the masked handler will note the pending interrupt for + * replay, and clear MSR[EE] in the interrupted context. + */ +INT_DEFINE_BEGIN(performance_monitor) + IVEC=0xf00 + IMASK=IRQS_PMI_DISABLED +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(performance_monitor) + EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20) - INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1 + GEN_INT_ENTRY performance_monitor, virt=0, ool=1 EXC_REAL_END(performance_monitor, 0xf00, 0x20) EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) - INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED + GEN_INT_ENTRY performance_monitor, virt=1, ool=1 EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) -INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0 -EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception) +EXC_COMMON_BEGIN(performance_monitor_common) + GEN_COMMON performance_monitor + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD + bl performance_monitor_exception + b interrupt_return + GEN_KVM performance_monitor + + +/** + * Interrupt 0xf20 - Vector Unavailable Interrupt. + * This is a synchronous interrupt in response to + * executing a vector (or altivec) instruction with MSR[VEC]=0. + * Similar to FP unavailable. + */ +INT_DEFINE_BEGIN(altivec_unavailable) + IVEC=0xf20 + IRECONCILE=0 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(altivec_unavailable) EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20) - INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1 + GEN_INT_ENTRY altivec_unavailable, virt=0, ool=1 EXC_REAL_END(altivec_unavailable, 0xf20, 0x20) EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20) - INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1 + GEN_INT_ENTRY altivec_unavailable, virt=1, ool=1 EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20) -INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0 EXC_COMMON_BEGIN(altivec_unavailable_common) - INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0 + GEN_COMMON altivec_unavailable #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION beq 1f @@ -1674,34 +2314,47 @@ BEGIN_FTR_SECTION END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) #endif bl load_up_altivec - b fast_exception_return + b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_tm - b ret_from_except + b interrupt_return #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception - b ret_from_except + b interrupt_return + GEN_KVM altivec_unavailable + + +/** + * Interrupt 0xf40 - VSX Unavailable Interrupt. + * This is a synchronous interrupt in response to + * executing a VSX instruction with MSR[VSX]=0. + * Similar to FP unavailable. + */ +INT_DEFINE_BEGIN(vsx_unavailable) + IVEC=0xf40 + IRECONCILE=0 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(vsx_unavailable) EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20) - INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1 + GEN_INT_ENTRY vsx_unavailable, virt=0, ool=1 EXC_REAL_END(vsx_unavailable, 0xf40, 0x20) EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20) - INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1 + GEN_INT_ENTRY vsx_unavailable, virt=1, ool=1 EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20) -INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0 EXC_COMMON_BEGIN(vsx_unavailable_common) - INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0 + GEN_COMMON vsx_unavailable #ifdef CONFIG_VSX BEGIN_FTR_SECTION beq 1f @@ -1717,40 +2370,78 @@ BEGIN_FTR_SECTION b load_up_vsx #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_tm - b ret_from_except + b interrupt_return #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_exception - b ret_from_except + b interrupt_return + GEN_KVM vsx_unavailable + + +/** + * Interrupt 0xf60 - Facility Unavailable Interrupt. + * This is a synchronous interrupt in response to + * executing an instruction without access to the facility that can be + * resolved by the OS (e.g., FSCR, MSR). + * Similar to FP unavailable. + */ +INT_DEFINE_BEGIN(facility_unavailable) + IVEC=0xf60 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(facility_unavailable) EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20) - INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1 + GEN_INT_ENTRY facility_unavailable, virt=0, ool=1 EXC_REAL_END(facility_unavailable, 0xf60, 0x20) EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20) - INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1 + GEN_INT_ENTRY facility_unavailable, virt=1, ool=1 EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) -INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0 -EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception) +EXC_COMMON_BEGIN(facility_unavailable_common) + GEN_COMMON facility_unavailable + addi r3,r1,STACK_FRAME_OVERHEAD + bl facility_unavailable_exception + b interrupt_return + + GEN_KVM facility_unavailable +/** + * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt. + * This is a synchronous interrupt in response to + * executing an instruction without access to the facility that can only + * be resolved in HV mode (e.g., HFSCR). + * Similar to FP unavailable. + */ +INT_DEFINE_BEGIN(h_facility_unavailable) + IVEC=0xf80 + IHSRR=1 + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(h_facility_unavailable) + EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20) - INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY h_facility_unavailable, virt=0, ool=1 EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20) EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20) - INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY h_facility_unavailable, virt=1, ool=1 EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) -INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0 -EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception) +EXC_COMMON_BEGIN(h_facility_unavailable_common) + GEN_COMMON h_facility_unavailable + addi r3,r1,STACK_FRAME_OVERHEAD + bl facility_unavailable_exception + b interrupt_return + + GEN_KVM h_facility_unavailable EXC_REAL_NONE(0xfa0, 0x20) @@ -1766,56 +2457,95 @@ EXC_REAL_NONE(0x1100, 0x100) EXC_VIRT_NONE(0x5100, 0x100) #ifdef CONFIG_CBE_RAS +INT_DEFINE_BEGIN(cbe_system_error) + IVEC=0x1200 + IHSRR=1 + IKVM_SKIP=1 + IKVM_REAL=1 +INT_DEFINE_END(cbe_system_error) + EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) - INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY cbe_system_error, virt=0 EXC_REAL_END(cbe_system_error, 0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) -INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1 -EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception) +EXC_COMMON_BEGIN(cbe_system_error_common) + GEN_COMMON cbe_system_error + addi r3,r1,STACK_FRAME_OVERHEAD + bl cbe_system_error_exception + b interrupt_return + + GEN_KVM cbe_system_error + #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) #endif +INT_DEFINE_BEGIN(instruction_breakpoint) + IVEC=0x1300 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_SKIP=1 + IKVM_REAL=1 +#endif +INT_DEFINE_END(instruction_breakpoint) + EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100) - INT_HANDLER instruction_breakpoint, 0x1300, kvm=1 + GEN_INT_ENTRY instruction_breakpoint, virt=0 EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100) EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100) - INT_HANDLER instruction_breakpoint, 0x1300, virt=1 + GEN_INT_ENTRY instruction_breakpoint, virt=1 EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) -INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1 -EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception) +EXC_COMMON_BEGIN(instruction_breakpoint_common) + GEN_COMMON instruction_breakpoint + addi r3,r1,STACK_FRAME_OVERHEAD + bl instruction_breakpoint_exception + b interrupt_return + + GEN_KVM instruction_breakpoint EXC_REAL_NONE(0x1400, 0x100) EXC_VIRT_NONE(0x5400, 0x100) -EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) - INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV +/** + * Interrupt 0x1500 - Soft Patch Interrupt + * + * Handling: + * This is an implementation specific interrupt which can be used for a + * range of exceptions. + * + * This interrupt handler is unique in that it runs the denormal assist + * code even for guests (and even in guest context) without going to KVM, + * for speed. POWER9 does not raise denorm exceptions, so this special case + * could be phased out in future to reduce special cases. + */ +INT_DEFINE_BEGIN(denorm_exception) + IVEC=0x1500 + IHSRR=1 + IBRANCH_COMMON=0 + IKVM_REAL=1 +INT_DEFINE_END(denorm_exception) + +EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100) + GEN_INT_ENTRY denorm_exception, virt=0 #ifdef CONFIG_PPC_DENORMALISATION - mfspr r10,SPRN_HSRR1 - andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ + andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */ bne+ denorm_assist #endif - KVMTEST denorm_exception_hv, EXC_HV 0x1500 - INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1 -EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100) - + GEN_BRANCH_TO_COMMON denorm_exception, virt=0 +EXC_REAL_END(denorm_exception, 0x1500, 0x100) #ifdef CONFIG_PPC_DENORMALISATION EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100) - INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0 - mfspr r10,SPRN_HSRR1 - andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ + GEN_INT_ENTRY denorm_exception, virt=1 + andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */ bne+ denorm_assist - INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV + GEN_BRANCH_TO_COMMON denorm_exception, virt=1 EXC_VIRT_END(denorm_exception, 0x5500, 0x100) #else EXC_VIRT_NONE(0x5500, 0x100) #endif -INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0 - #ifdef CONFIG_PPC_DENORMALISATION TRAMP_REAL_BEGIN(denorm_assist) BEGIN_FTR_SECTION @@ -1872,7 +2602,10 @@ denorm_done: mtspr SPRN_HSRR0,r11 mtcrf 0x80,r9 ld r9,PACA_EXGEN+EX_R9(r13) - RESTORE_PPR_PACA(PACA_EXGEN, r10) +BEGIN_FTR_SECTION + ld r10,PACA_EXGEN+EX_PPR(r13) + mtspr SPRN_PPR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) BEGIN_FTR_SECTION ld r10,PACA_EXGEN+EX_CFAR(r13) mtspr SPRN_CFAR,r10 @@ -1885,43 +2618,88 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) b . #endif -EXC_COMMON(denorm_common, 0x1500, unknown_exception) +EXC_COMMON_BEGIN(denorm_exception_common) + GEN_COMMON denorm_exception + addi r3,r1,STACK_FRAME_OVERHEAD + bl unknown_exception + b interrupt_return + + GEN_KVM denorm_exception #ifdef CONFIG_CBE_RAS +INT_DEFINE_BEGIN(cbe_maintenance) + IVEC=0x1600 + IHSRR=1 + IKVM_SKIP=1 + IKVM_REAL=1 +INT_DEFINE_END(cbe_maintenance) + EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) - INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY cbe_maintenance, virt=0 EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) -INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1 -EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception) +EXC_COMMON_BEGIN(cbe_maintenance_common) + GEN_COMMON cbe_maintenance + addi r3,r1,STACK_FRAME_OVERHEAD + bl cbe_maintenance_exception + b interrupt_return + + GEN_KVM cbe_maintenance + #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) #endif +INT_DEFINE_BEGIN(altivec_assist) + IVEC=0x1700 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + IKVM_REAL=1 +#endif +INT_DEFINE_END(altivec_assist) + EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100) - INT_HANDLER altivec_assist, 0x1700, kvm=1 + GEN_INT_ENTRY altivec_assist, virt=0 EXC_REAL_END(altivec_assist, 0x1700, 0x100) EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100) - INT_HANDLER altivec_assist, 0x1700, virt=1 + GEN_INT_ENTRY altivec_assist, virt=1 EXC_VIRT_END(altivec_assist, 0x5700, 0x100) -INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0 +EXC_COMMON_BEGIN(altivec_assist_common) + GEN_COMMON altivec_assist + addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_ALTIVEC -EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception) + bl altivec_assist_exception + REST_NVGPRS(r1) /* instruction emulation may change GPRs */ #else -EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception) + bl unknown_exception #endif + b interrupt_return + + GEN_KVM altivec_assist #ifdef CONFIG_CBE_RAS +INT_DEFINE_BEGIN(cbe_thermal) + IVEC=0x1800 + IHSRR=1 + IKVM_SKIP=1 + IKVM_REAL=1 +INT_DEFINE_END(cbe_thermal) + EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) - INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY cbe_thermal, virt=0 EXC_REAL_END(cbe_thermal, 0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) -INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1 -EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception) +EXC_COMMON_BEGIN(cbe_thermal_common) + GEN_COMMON cbe_thermal + addi r3,r1,STACK_FRAME_OVERHEAD + bl cbe_thermal_exception + b interrupt_return + + GEN_KVM cbe_thermal + #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) @@ -1930,14 +2708,11 @@ EXC_VIRT_NONE(0x5800, 0x100) #ifdef CONFIG_PPC_WATCHDOG -#define MASKED_DEC_HANDLER_LABEL 3f - -#define MASKED_DEC_HANDLER(_H) \ -3: /* soft-nmi */ \ - std r12,PACA_EXGEN+EX_R12(r13); \ - GET_SCRATCH0(r10); \ - std r10,PACA_EXGEN+EX_R13(r13); \ - INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1 +INT_DEFINE_BEGIN(soft_nmi) + IVEC=0x900 + ISTACK=0 + IRECONCILE=0 /* Soft-NMI may fire under local_irq_disable */ +INT_DEFINE_END(soft_nmi) /* * Branch to soft_nmi_interrupt using the emergency stack. The emergency @@ -1949,18 +2724,42 @@ EXC_VIRT_NONE(0x5800, 0x100) * and run it entirely with interrupts hard disabled. */ EXC_COMMON_BEGIN(soft_nmi_common) + mfspr r11,SPRN_SRR0 mr r10,r1 ld r1,PACAEMERGSP(r13) subi r1,r1,INT_FRAME_SIZE - INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0 - bl save_nvgprs + __GEN_COMMON_BODY soft_nmi + + /* + * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see + * system_reset_common) + */ + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + lbz r10,PACAIRQHAPPENED(r13) + std r10,_DAR(r1) + ori r10,r10,PACA_IRQ_HARD_DIS + stb r10,PACAIRQHAPPENED(r13) + addi r3,r1,STACK_FRAME_OVERHEAD bl soft_nmi_interrupt - b ret_from_except -#else /* CONFIG_PPC_WATCHDOG */ -#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */ -#define MASKED_DEC_HANDLER(_H) + /* Clear MSR_RI before setting SRR0 and SRR1. */ + li r9,0 + mtmsrd r9,1 + + /* + * Restore soft mask settings. + */ + ld r10,_DAR(r1) + stb r10,PACAIRQHAPPENED(r13) + ld r10,SOFTE(r1) + stb r10,PACAIRQSOFTMASK(r13) + + kuap_restore_amr r10 + EXCEPTION_RESTORE_REGS hsrr=0 + RFI_TO_KERNEL + #endif /* CONFIG_PPC_WATCHDOG */ /* @@ -1973,13 +2772,12 @@ EXC_COMMON_BEGIN(soft_nmi_common) * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return. * This is called with r10 containing the value to OR to the paca field. */ -.macro MASKED_INTERRUPT hsrr +.macro MASKED_INTERRUPT hsrr=0 .if \hsrr masked_Hinterrupt: .else masked_interrupt: .endif - std r11,PACA_EXGEN+EX_R11(r13) lbz r11,PACAIRQHAPPENED(r13) or r11,r11,r10 stb r11,PACAIRQHAPPENED(r13) @@ -1988,26 +2786,30 @@ masked_interrupt: lis r10,0x7fff ori r10,r10,0xffff mtspr SPRN_DEC,r10 - b MASKED_DEC_HANDLER_LABEL +#ifdef CONFIG_PPC_WATCHDOG + b soft_nmi_common +#else + b 2f +#endif 1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK beq 2f + xori r12,r12,MSR_EE /* clear MSR_EE */ .if \hsrr - mfspr r10,SPRN_HSRR1 - xori r10,r10,MSR_EE /* clear MSR_EE */ - mtspr SPRN_HSRR1,r10 + mtspr SPRN_HSRR1,r12 .else - mfspr r10,SPRN_SRR1 - xori r10,r10,MSR_EE /* clear MSR_EE */ - mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR1,r12 .endif ori r11,r11,PACA_IRQ_HARD_DIS stb r11,PACAIRQHAPPENED(r13) 2: /* done */ + ld r10,PACA_EXGEN+EX_CTR(r13) + mtctr r10 mtcrf 0x80,r9 std r1,PACAR1(r13) ld r9,PACA_EXGEN+EX_R9(r13) ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) /* returns to kernel where r13 must be set up, so don't restore it */ .if \hsrr HRFI_TO_KERNEL @@ -2015,7 +2817,6 @@ masked_interrupt: RFI_TO_KERNEL .endif b . - MASKED_DEC_HANDLER(\hsrr\()) .endm TRAMP_REAL_BEGIN(stf_barrier_fallback) @@ -2117,17 +2918,12 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid -/* - * Real mode exceptions actually use this too, but alternate - * instruction code patches (which end up in the common .text area) - * cannot reach these if they are put there. - */ -USE_FIXED_SECTION(virt_trampolines) - MASKED_INTERRUPT EXC_STD - MASKED_INTERRUPT EXC_HV +USE_TEXT_SECTION() + MASKED_INTERRUPT + MASKED_INTERRUPT hsrr=1 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER -TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) +kvmppc_skip_interrupt: /* * Here all GPRs are unchanged from when the interrupt happened * except for r13, which is saved in SPRG_SCRATCH0. @@ -2139,7 +2935,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) RFI_TO_KERNEL b . -TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) +kvmppc_skip_Hinterrupt: /* * Here all GPRs are unchanged from when the interrupt happened * except for r13, which is saved in SPRG_SCRATCH0. @@ -2152,16 +2948,6 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) b . #endif -/* - * Ensure that any handlers that get invoked from the exception prologs - * above are below the first 64KB (0x10000) of the kernel image because - * the prologs assemble the addresses of these handlers using the - * LOAD_HANDLER macro, which uses an ori instruction. - */ - -/*** Common interrupt handlers ***/ - - /* * Relocation-on interrupts: A subset of the interrupts can be delivered * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering @@ -2275,7 +3061,7 @@ do_hash_page: cmpdi r3,0 /* see if __hash_page succeeded */ /* Success */ - beq fast_exc_return_irq /* Return from exception on success */ + beq interrupt_return /* Return from exception on success */ /* Error */ blt- 13f @@ -2292,39 +3078,36 @@ handle_page_fault: addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault cmpdi r3,0 - beq+ ret_from_except_lite - bl save_nvgprs + beq+ interrupt_return mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) bl bad_page_fault - b ret_from_except + b interrupt_return /* We have a data breakpoint exception - handle it */ handle_dabr_fault: - bl save_nvgprs ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_break /* * do_break() may have changed the NV GPRS while handling a breakpoint. - * If so, we need to restore them with their updated values. Don't use - * ret_from_except_lite here. + * If so, we need to restore them with their updated values. */ - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return #ifdef CONFIG_PPC_BOOK3S_64 /* We have a page fault that hash_page could handle but HV refused * the PTE insertion */ -13: bl save_nvgprs - mr r5,r3 +13: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) bl low_hash_fault - b ret_from_except + b interrupt_return #endif /* @@ -2334,11 +3117,10 @@ handle_dabr_fault: * were soft-disabled. We want to invoke the exception handler for * the access, or panic if there isn't a handler. */ -77: bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD +77: addi r3,r1,STACK_FRAME_OVERHEAD li r5,SIGSEGV bl bad_page_fault - b ret_from_except + b interrupt_return /* * When doorbell is triggered from system reset wakeup, the message is @@ -2352,56 +3134,9 @@ handle_dabr_fault: h_doorbell_common_msgclr: LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36)) PPC_MSGCLR(3) - b h_doorbell_common + b h_doorbell_common_virt doorbell_super_common_msgclr: LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36)) PPC_MSGCLRP(3) - b doorbell_super_common - -/* - * Called from arch_local_irq_enable when an interrupt needs - * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate - * which kind of interrupt. MSR:EE is already off. We generate a - * stackframe like if a real interrupt had happened. - * - * Note: While MSR:EE is off, we need to make sure that _MSR - * in the generated frame has EE set to 1 or the exception - * handler will not properly re-enable them. - * - * Note that we don't specify LR as the NIP (return address) for - * the interrupt because that would unbalance the return branch - * predictor. - */ -_GLOBAL(__replay_interrupt) - /* We are going to jump to the exception common code which - * will retrieve various register values from the PACA which - * we don't give a damn about, so we don't bother storing them. - */ - mfmsr r12 - LOAD_REG_ADDR(r11, replay_interrupt_return) - mfcr r9 - ori r12,r12,MSR_EE - cmpwi r3,0x900 - beq decrementer_common - cmpwi r3,0x500 -BEGIN_FTR_SECTION - beq h_virt_irq_common -FTR_SECTION_ELSE - beq hardware_interrupt_common -ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300) - cmpwi r3,0xf00 - beq performance_monitor_common -BEGIN_FTR_SECTION - cmpwi r3,0xa00 - beq h_doorbell_common_msgclr - cmpwi r3,0xe60 - beq hmi_exception_common -FTR_SECTION_ELSE - cmpwi r3,0xa00 - beq doorbell_super_common_msgclr -ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) -replay_interrupt_return: - blr - -_ASM_NOKPROBE_SYMBOL(__replay_interrupt) + b doorbell_super_common_virt diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index ff0114aeba9b5c..59e60a9a9f5c5e 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -36,6 +36,8 @@ static struct fw_dump fw_dump; static void __init fadump_reserve_crash_area(u64 base); +struct kobject *fadump_kobj; + #ifndef CONFIG_PRESERVE_FA_DUMP static DEFINE_MUTEX(fadump_mutex); struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 }; @@ -1323,9 +1325,9 @@ static void fadump_invalidate_release_mem(void) fw_dump.ops->fadump_init_mem_struct(&fw_dump); } -static ssize_t fadump_release_memory_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) +static ssize_t release_mem_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) { int input = -1; @@ -1350,23 +1352,40 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj, return count; } -static ssize_t fadump_enabled_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) +/* Release the reserved memory and disable the FADump */ +static void unregister_fadump(void) +{ + fadump_cleanup(); + fadump_release_memory(fw_dump.reserve_dump_area_start, + fw_dump.reserve_dump_area_size); + fw_dump.fadump_enabled = 0; + kobject_put(fadump_kobj); +} + +static ssize_t enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) { return sprintf(buf, "%d\n", fw_dump.fadump_enabled); } -static ssize_t fadump_register_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) +static ssize_t mem_reserved_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size); +} + +static ssize_t registered_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) { return sprintf(buf, "%d\n", fw_dump.dump_registered); } -static ssize_t fadump_register_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) +static ssize_t registered_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) { int ret = 0; int input = -1; @@ -1418,45 +1437,82 @@ static int fadump_region_show(struct seq_file *m, void *private) return 0; } -static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem, - 0200, NULL, - fadump_release_memory_store); -static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled, - 0444, fadump_enabled_show, - NULL); -static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered, - 0644, fadump_register_show, - fadump_register_store); +static struct kobj_attribute release_attr = __ATTR_WO(release_mem); +static struct kobj_attribute enable_attr = __ATTR_RO(enabled); +static struct kobj_attribute register_attr = __ATTR_RW(registered); +static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved); + +static struct attribute *fadump_attrs[] = { + &enable_attr.attr, + ®ister_attr.attr, + &mem_reserved_attr.attr, + NULL, +}; + +ATTRIBUTE_GROUPS(fadump); DEFINE_SHOW_ATTRIBUTE(fadump_region); static void fadump_init_files(void) { - struct dentry *debugfs_file; int rc = 0; - rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr); - if (rc) - printk(KERN_ERR "fadump: unable to create sysfs file" - " fadump_enabled (%d)\n", rc); + fadump_kobj = kobject_create_and_add("fadump", kernel_kobj); + if (!fadump_kobj) { + pr_err("failed to create fadump kobject\n"); + return; + } - rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr); - if (rc) - printk(KERN_ERR "fadump: unable to create sysfs file" - " fadump_registered (%d)\n", rc); + debugfs_create_file("fadump_region", 0444, powerpc_debugfs_root, NULL, + &fadump_region_fops); - debugfs_file = debugfs_create_file("fadump_region", 0444, - powerpc_debugfs_root, NULL, - &fadump_region_fops); - if (!debugfs_file) - printk(KERN_ERR "fadump: unable to create debugfs file" - " fadump_region\n"); + if (fw_dump.dump_active) { + rc = sysfs_create_file(fadump_kobj, &release_attr.attr); + if (rc) + pr_err("unable to create release_mem sysfs file (%d)\n", + rc); + } + + rc = sysfs_create_groups(fadump_kobj, fadump_groups); + if (rc) { + pr_err("sysfs group creation failed (%d), unregistering FADump", + rc); + unregister_fadump(); + return; + } + + /* + * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to + * create symlink at old location to maintain backward compatibility. + * + * - fadump_enabled -> fadump/enabled + * - fadump_registered -> fadump/registered + * - fadump_release_mem -> fadump/release_mem + */ + rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj, + "enabled", "fadump_enabled"); + if (rc) { + pr_err("unable to create fadump_enabled symlink (%d)", rc); + return; + } + + rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj, + "registered", + "fadump_registered"); + if (rc) { + pr_err("unable to create fadump_registered symlink (%d)", rc); + sysfs_remove_link(kernel_kobj, "fadump_enabled"); + return; + } if (fw_dump.dump_active) { - rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr); + rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, + fadump_kobj, + "release_mem", + "fadump_release_mem"); if (rc) - printk(KERN_ERR "fadump: unable to create sysfs file" - " fadump_release_mem (%d)\n", rc); + pr_err("unable to create fadump_release_mem symlink (%d)", + rc); } return; } diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 97c887950c3ca1..daaa153950c288 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -348,7 +348,7 @@ BEGIN_MMU_FTR_SECTION andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h #endif bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ - rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ + rlwinm r3, r5, 32 - 24, 30, 30 /* DSISR_STORE -> _PAGE_RW */ bl hash_page b handle_page_fault_tramp_1 FTR_SECTION_ELSE @@ -497,7 +497,6 @@ InstructionTLBMiss: andc. r1,r1,r0 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ ori r1, r1, 0xe06 /* clear out reserved bits */ andc r1, r0, r1 /* PP = user? 1 : 0 */ BEGIN_FTR_SECTION @@ -565,9 +564,8 @@ DataLoadTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + rlwinm r1,r0,0,30,30 /* _PAGE_RW -> PP msb */ + rlwimi r0,r0,1,30,30 /* _PAGE_USER -> PP msb */ ori r1,r1,0xe04 /* clear out reserved bits */ andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION @@ -645,7 +643,6 @@ DataStoreTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ andc r1,r0,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 9db162f79fe6e6..9abec6cd099c69 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -130,37 +130,36 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) .macro SYSCALL_ENTRY trapno mfspr r12,SPRN_SPRG_THREAD + mfspr r9, SPRN_SRR1 #ifdef CONFIG_VMAP_STACK - mfspr r9, SPRN_SRR0 - mfspr r11, SPRN_SRR1 - stw r9, SRR0(r12) - stw r11, SRR1(r12) + mfspr r11, SPRN_SRR0 + mtctr r11 #endif - mfcr r10 + andi. r11, r9, MSR_PR lwz r11,TASK_STACK-THREAD(r12) - rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ + beq- 99f addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE #ifdef CONFIG_VMAP_STACK - li r9, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r9 + li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + mtmsr r10 isync #endif tovirt_vmstack r12, r12 tophys_novmstack r11, r11 - mflr r9 - stw r10,_CCR(r11) /* save registers */ - stw r9, _LINK(r11) + mflr r10 + stw r10, _LINK(r11) #ifdef CONFIG_VMAP_STACK - lwz r10, SRR0(r12) - lwz r9, SRR1(r12) + mfctr r10 #else mfspr r10,SPRN_SRR0 - mfspr r9,SPRN_SRR1 #endif stw r1,GPR1(r11) stw r1,0(r11) tovirt_novmstack r1, r11 /* set new kernel sp */ stw r10,_NIP(r11) + mfcr r10 + rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ + stw r10,_CCR(r11) /* save registers */ #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #else @@ -228,6 +227,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) mtspr SPRN_SRR0,r11 SYNC RFI /* jump to handler, enable MMU */ +99: b ret_from_kernel_syscall .endm .macro save_dar_dsisr_on_stack reg1, reg2, sp diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ad79fddb974dee..ddfbd02140d9b8 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -537,6 +537,7 @@ __start_initialization_multiplatform: b __after_prom_start #endif /* CONFIG_PPC_BOOK3E */ +__REF __boot_from_prom: #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE /* Save parameters */ @@ -574,6 +575,7 @@ __boot_from_prom: /* We never return. We also hit that trap if trying to boot * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */ trap + .previous __after_prom_start: #ifdef CONFIG_RELOCATABLE @@ -977,7 +979,6 @@ start_here_multiplatform: RFI b . /* prevent speculative execution */ - .previous /* This is where all platforms converge execution */ start_here_common: @@ -1001,6 +1002,7 @@ start_here_common: /* Not reached */ trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 + .previous /* * We put a few things here that have to be page-aligned. diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 37fc84ed90e3a0..bd2e5ed8dd50fb 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -104,16 +104,18 @@ FTR_SECTION_ELSE #ifdef CONFIG_KVM_BOOKE_HV ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif + mfspr r9, SPRN_SRR1 BOOKE_CLEAR_BTB(r11) + andi. r11, r9, MSR_PR lwz r11, TASK_STACK - THREAD(r10) rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ + beq- 99f ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) stw r12, _CCR(r11) /* save various registers */ mflr r12 stw r12,_LINK(r11) mfspr r12,SPRN_SRR0 stw r1, GPR1(r11) - mfspr r9,SPRN_SRR1 stw r1, 0(r11) mr r1, r11 stw r12,_NIP(r11) @@ -176,6 +178,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mtspr SPRN_SRR0,r11 SYNC RFI /* jump to handler, enable MMU */ +99: b ret_from_kernel_syscall .endm /* To handle the additional exception priority levels on 40x and Book-E diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index d0854320bb50fd..72f461bd70fba2 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -429,3 +429,19 @@ void hw_breakpoint_pmu_read(struct perf_event *bp) { /* TODO */ } + +void ptrace_triggered(struct perf_event *bp, + struct perf_sample_data *data, struct pt_regs *regs) +{ + struct perf_event_attr attr; + + /* + * Disable the breakpoint request here since ptrace has defined a + * one-shot behaviour for breakpoint exceptions in PPC64. + * The SIGTRAP signal is generated automatically for us in do_dabr(). + * We don't have to do anything about that here + */ + attr = bp->attr; + attr.disabled = true; + modify_user_hw_breakpoint(bp, &attr); +} diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5c9b11878555af..a25ed47087ee24 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -70,6 +70,7 @@ #include #include #include +#include #endif #define CREATE_TRACE_POINTS #include @@ -109,6 +110,8 @@ static inline notrace int decrementer_check_overflow(void) return now >= *next_tb; } +#ifdef CONFIG_PPC_BOOK3E + /* This is called whenever we are re-enabling interrupts * and returns either 0 (nothing to do) or 500/900/280/a00/e80 if * there's an EE, DEC or DBELL to generate. @@ -168,6 +171,67 @@ notrace unsigned int __check_irq_replay(void) } } + if (happened & PACA_IRQ_DEC) { + local_paca->irq_happened &= ~PACA_IRQ_DEC; + return 0x900; + } + + if (happened & PACA_IRQ_EE) { + local_paca->irq_happened &= ~PACA_IRQ_EE; + return 0x500; + } + + /* + * Check if an EPR external interrupt happened this bit is typically + * set if we need to handle another "edge" interrupt from within the + * MPIC "EPR" handler. + */ + if (happened & PACA_IRQ_EE_EDGE) { + local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; + return 0x500; + } + + if (happened & PACA_IRQ_DBELL) { + local_paca->irq_happened &= ~PACA_IRQ_DBELL; + return 0x280; + } + + /* There should be nothing left ! */ + BUG_ON(local_paca->irq_happened != 0); + + return 0; +} +#endif /* CONFIG_PPC_BOOK3E */ + +void replay_soft_interrupts(void) +{ + /* + * We use local_paca rather than get_paca() to avoid all + * the debug_smp_processor_id() business in this low level + * function + */ + unsigned char happened = local_paca->irq_happened; + struct pt_regs regs; + + ppc_save_regs(®s); + regs.softe = IRQS_ALL_DISABLED; + +again: + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + + if (happened & PACA_IRQ_HARD_DIS) { + /* + * We may have missed a decrementer interrupt if hard disabled. + * Check the decrementer register in case we had a rollover + * while hard disabled. + */ + if (!(happened & PACA_IRQ_DEC)) { + if (decrementer_check_overflow()) + happened |= PACA_IRQ_DEC; + } + } + /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. @@ -182,58 +246,78 @@ notrace unsigned int __check_irq_replay(void) * This is a higher priority interrupt than the others, so * replay it first. */ - if (happened & PACA_IRQ_HMI) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_HMI)) { local_paca->irq_happened &= ~PACA_IRQ_HMI; - return 0xe60; + regs.trap = 0xe60; + handle_hmi_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); } if (happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; - return 0x900; - } - - if (happened & PACA_IRQ_PMI) { - local_paca->irq_happened &= ~PACA_IRQ_PMI; - return 0xf00; + regs.trap = 0x900; + timer_interrupt(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); } if (happened & PACA_IRQ_EE) { local_paca->irq_happened &= ~PACA_IRQ_EE; - return 0x500; + regs.trap = 0x500; + do_IRQ(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); } -#ifdef CONFIG_PPC_BOOK3E /* * Check if an EPR external interrupt happened this bit is typically * set if we need to handle another "edge" interrupt from within the * MPIC "EPR" handler. */ - if (happened & PACA_IRQ_EE_EDGE) { + if (IS_ENABLED(CONFIG_PPC_BOOK3E) && (happened & PACA_IRQ_EE_EDGE)) { local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; - return 0x500; + regs.trap = 0x500; + do_IRQ(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); } - if (happened & PACA_IRQ_DBELL) { - local_paca->irq_happened &= ~PACA_IRQ_DBELL; - return 0x280; - } -#else - if (happened & PACA_IRQ_DBELL) { + if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (happened & PACA_IRQ_DBELL)) { local_paca->irq_happened &= ~PACA_IRQ_DBELL; - return 0xa00; + if (IS_ENABLED(CONFIG_PPC_BOOK3E)) + regs.trap = 0x280; + else + regs.trap = 0xa00; + doorbell_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); } -#endif /* CONFIG_PPC_BOOK3E */ - /* There should be nothing left ! */ - BUG_ON(local_paca->irq_happened != 0); + /* Book3E does not support soft-masking PMI interrupts */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_PMI)) { + local_paca->irq_happened &= ~PACA_IRQ_PMI; + regs.trap = 0xf00; + performance_monitor_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } - return 0; + happened = local_paca->irq_happened; + if (happened & ~PACA_IRQ_HARD_DIS) { + /* + * We are responding to the next interrupt, so interrupt-off + * latencies should be reset here. + */ + trace_hardirqs_on(); + trace_hardirqs_off(); + goto again; + } } notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; - unsigned int replay; /* Write the new soft-enabled value */ irq_soft_mask_set(mask); @@ -255,24 +339,16 @@ notrace void arch_local_irq_restore(unsigned long mask) */ irq_happened = get_irq_happened(); if (!irq_happened) { -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON_ONCE(!(mfmsr() & MSR_EE)); -#endif + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); return; } - /* - * We need to hard disable to get a trusted value from - * __check_irq_replay(). We also need to soft-disable - * again to avoid warnings in there due to the use of - * per-cpu variables. - */ + /* We need to hard disable to replay. */ if (!(irq_happened & PACA_IRQ_HARD_DIS)) { -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON_ONCE(!(mfmsr() & MSR_EE)); -#endif + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); __hard_irq_disable(); -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG } else { /* * We should already be hard disabled here. We had bugs @@ -280,35 +356,26 @@ notrace void arch_local_irq_restore(unsigned long mask) * warn if we are wrong. Only do that when IRQ tracing * is enabled as mfmsr() can be costly. */ - if (WARN_ON_ONCE(mfmsr() & MSR_EE)) - __hard_irq_disable(); -#endif + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (WARN_ON_ONCE(mfmsr() & MSR_EE)) + __hard_irq_disable(); + } + + if (irq_happened == PACA_IRQ_HARD_DIS) { + local_paca->irq_happened = 0; + __hard_irq_enable(); + return; + } } irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); - /* - * Check if anything needs to be re-emitted. We haven't - * soft-enabled yet to avoid warnings in decrementer_check_overflow - * accessing per-cpu variables - */ - replay = __check_irq_replay(); + replay_soft_interrupts(); + local_paca->irq_happened = 0; - /* We can soft-enable now */ trace_hardirqs_on(); irq_soft_mask_set(IRQS_ENABLED); - - /* - * And replay if we have to. This will return with interrupts - * hard-enabled. - */ - if (replay) { - __replay_interrupt(replay); - return; - } - - /* Finally, let's ensure we are hard enabled */ __hard_irq_enable(); } EXPORT_SYMBOL(arch_local_irq_restore); @@ -599,17 +666,18 @@ u64 arch_irq_stat_cpu(unsigned int cpu) static inline void check_stack_overflow(void) { -#ifdef CONFIG_DEBUG_STACKOVERFLOW long sp; - sp = current_stack_pointer() & (THREAD_SIZE-1); + if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW)) + return; + + sp = current_stack_pointer & (THREAD_SIZE - 1); /* check for stack overflow: is there less than 2KB free? */ if (unlikely(sp < 2048)) { pr_err("do_IRQ: stack overflow: %ld\n", sp); dump_stack(); } -#endif } void __do_irq(struct pt_regs *regs) @@ -647,7 +715,7 @@ void do_IRQ(struct pt_regs *regs) void *cursp, *irqsp, *sirqsp; /* Switch to the irq stack to handle this */ - cursp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); + cursp = (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2d27ec4feee4a4..81efb605113ea9 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -264,6 +264,9 @@ int kprobe_handler(struct pt_regs *regs) if (user_mode(regs)) return 0; + if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)) + return 0; + /* * We don't want to be preempted for the entire * duration of kprobe processing @@ -271,54 +274,6 @@ int kprobe_handler(struct pt_regs *regs) preempt_disable(); kcb = get_kprobe_ctlblk(); - /* Check we're not actually recursing */ - if (kprobe_running()) { - p = get_kprobe(addr); - if (p) { - kprobe_opcode_t insn = *p->ainsn.insn; - if (kcb->kprobe_status == KPROBE_HIT_SS && - is_trap(insn)) { - /* Turn off 'trace' bits */ - regs->msr &= ~MSR_SINGLESTEP; - regs->msr |= kcb->kprobe_saved_msr; - goto no_kprobe; - } - /* We have reentered the kprobe_handler(), since - * another probe was hit while within the handler. - * We here save the original kprobes variables and - * just single step on the instruction of the new probe - * without calling any user handlers. - */ - save_previous_kprobe(kcb); - set_current_kprobe(p, regs, kcb); - kprobes_inc_nmissed_count(p); - kcb->kprobe_status = KPROBE_REENTER; - if (p->ainsn.boostable >= 0) { - ret = try_to_emulate(p, regs); - - if (ret > 0) { - restore_previous_kprobe(kcb); - preempt_enable_no_resched(); - return 1; - } - } - prepare_singlestep(p, regs); - return 1; - } else if (*addr != BREAKPOINT_INSTRUCTION) { - /* If trap variant, then it belongs not to us */ - kprobe_opcode_t cur_insn = *addr; - - if (is_trap(cur_insn)) - goto no_kprobe; - /* The breakpoint instruction was removed by - * another cpu right after we hit, no further - * handling of this interrupt is appropriate - */ - ret = 1; - } - goto no_kprobe; - } - p = get_kprobe(addr); if (!p) { if (*addr != BREAKPOINT_INSTRUCTION) { @@ -343,6 +298,39 @@ int kprobe_handler(struct pt_regs *regs) goto no_kprobe; } + /* Check we're not actually recursing */ + if (kprobe_running()) { + kprobe_opcode_t insn = *p->ainsn.insn; + if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) { + /* Turn off 'trace' bits */ + regs->msr &= ~MSR_SINGLESTEP; + regs->msr |= kcb->kprobe_saved_msr; + goto no_kprobe; + } + + /* + * We have reentered the kprobe_handler(), since another probe + * was hit while within the handler. We here save the original + * kprobes variables and just single step on the instruction of + * the new probe without calling any user handlers. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kprobes_inc_nmissed_count(p); + kcb->kprobe_status = KPROBE_REENTER; + if (p->ainsn.boostable >= 0) { + ret = try_to_emulate(p, regs); + + if (ret > 0) { + restore_previous_kprobe(kcb); + preempt_enable_no_resched(); + return 1; + } + } + prepare_singlestep(p, regs); + return 1; + } + kcb->kprobe_status = KPROBE_HIT_ACTIVE; set_current_kprobe(p, regs, kcb); if (p->pre_handler && p->pre_handler(p, regs)) { diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 34c1001e9e8bf1..8077b5fb18a79e 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -251,6 +252,19 @@ void machine_check_queue_event(void) /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); } + +void mce_common_process_ue(struct pt_regs *regs, + struct mce_error_info *mce_err) +{ + const struct exception_table_entry *entry; + + entry = search_kernel_exception_table(regs->nip); + if (entry) { + mce_err->ignore_event = true; + regs->nip = extable_fixup(entry); + } +} + /* * process pending MCE event from the mce event queue. This function will be * called during syscall exit. diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 1cbf7f1a4e3d89..067b094bfeff5e 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -579,14 +579,10 @@ static long mce_handle_ue_error(struct pt_regs *regs, struct mce_error_info *mce_err) { long handled = 0; - const struct exception_table_entry *entry; - entry = search_kernel_exception_table(regs->nip); - if (entry) { - mce_err->ignore_event = true; - regs->nip = extable_fixup(entry); + mce_common_process_ue(regs, mce_err); + if (mce_err->ignore_event) return 1; - } /* * On specific SCOM read via MMIO we may get a machine check diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 974f65f79a8efe..65f9f731c229be 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -110,7 +110,7 @@ _GLOBAL(longjmp) li r3, 1 blr -_GLOBAL(current_stack_pointer) +_GLOBAL(current_stack_frame) PPC_LL r3,0(r1) blr -EXPORT_SYMBOL(current_stack_pointer) +EXPORT_SYMBOL(current_stack_frame) diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 427fc22f72b6bc..71a3f97dc98852 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -62,13 +62,9 @@ static int of_pci_phb_probe(struct platform_device *dev) /* Init pci_dn data structures */ pci_devs_phb_init_dynamic(phb); - /* Create EEH devices for the PHB */ + /* Create EEH PEs for the PHB */ eeh_dev_phb_init_dynamic(phb); - /* Register devices with EEH */ - if (dev->dev.of_node->child) - eeh_add_device_tree_early(PCI_DN(dev->dev.of_node)); - /* Scan the bus */ pcibios_scan_phb(phb); if (phb->bus == NULL) @@ -80,15 +76,9 @@ static int of_pci_phb_probe(struct platform_device *dev) */ pcibios_claim_one_bus(phb->bus); - /* Finish EEH setup */ - eeh_add_device_tree_late(phb->bus); - /* Add probed PCI devices to the device model */ pci_bus_add_devices(phb->bus); - /* sysfs files should only be added after devices are added */ - eeh_add_sysfs_files(phb->bus); - return 0; } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 949eceb254d859..3f91ccaa9c74fd 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -176,7 +176,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) struct paca_struct **paca_ptrs __read_mostly; EXPORT_SYMBOL(paca_ptrs); -void __init initialise_paca(struct paca_struct *new_paca, int cpu) +void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu) { #ifdef CONFIG_PPC_PSERIES new_paca->lppaca_ptr = NULL; @@ -205,7 +205,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) } /* Put the paca pointer into r13 and SPRG_PACA */ -void setup_paca(struct paca_struct *new_paca) +void __nostackprotector setup_paca(struct paca_struct *new_paca) { /* Setup r13 */ local_paca = new_paca; @@ -214,11 +214,15 @@ void setup_paca(struct paca_struct *new_paca) /* On Book3E, initialize the TLB miss exception frames */ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); #else - /* In HV mode, we setup both HPACA and PACA to avoid problems + /* + * In HV mode, we setup both HPACA and PACA to avoid problems * if we do a GET_PACA() before the feature fixups have been - * applied + * applied. + * + * Normally you should test against CPU_FTR_HVMODE, but CPU features + * are not yet set up when we first reach here. */ - if (early_cpu_has_feature(CPU_FTR_HVMODE)) + if (mfmsr() & MSR_HV) mtspr(SPRN_SPRG_HPACA, local_paca); #endif mtspr(SPRN_SPRG_PACA, local_paca); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index d0074ad73aa3db..be108616a721fb 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1399,14 +1399,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) pci_assign_unassigned_bus_resources(bus); } - /* Fixup EEH */ - eeh_add_device_tree_late(bus); - /* Add new devices to global lists. Register in proc, sysfs. */ pci_bus_add_devices(bus); - - /* sysfs files should only be added after devices are added */ - eeh_add_sysfs_files(bus); } EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus); diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index d6a67f814983d0..bf83f76563a338 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -112,8 +112,6 @@ void pci_hp_add_devices(struct pci_bus *bus) struct pci_controller *phb; struct device_node *dn = pci_bus_to_OF_node(bus); - eeh_add_device_tree_early(PCI_DN(dn)); - phb = pci_bus_to_host(bus); mode = PCI_PROBE_NORMAL; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index fad50db9dcf238..9c21288f864556 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -236,23 +236,9 @@ void enable_kernel_fp(void) } } EXPORT_SYMBOL(enable_kernel_fp); - -static int restore_fp(struct task_struct *tsk) -{ - if (tsk->thread.load_fp) { - load_fp_state(¤t->thread.fp_state); - current->thread.load_fp++; - return 1; - } - return 0; -} -#else -static int restore_fp(struct task_struct *tsk) { return 0; } #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC -#define loadvec(thr) ((thr).load_vec) - static void __giveup_altivec(struct task_struct *tsk) { unsigned long msr; @@ -318,21 +304,6 @@ void flush_altivec_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_altivec_to_thread); - -static int restore_altivec(struct task_struct *tsk) -{ - if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) { - load_vr_state(&tsk->thread.vr_state); - tsk->thread.used_vr = 1; - tsk->thread.load_vec++; - - return 1; - } - return 0; -} -#else -#define loadvec(thr) 0 -static inline int restore_altivec(struct task_struct *tsk) { return 0; } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX @@ -400,18 +371,6 @@ void flush_vsx_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_vsx_to_thread); - -static int restore_vsx(struct task_struct *tsk) -{ - if (cpu_has_feature(CPU_FTR_VSX)) { - tsk->thread.used_vsr = 1; - return 1; - } - - return 0; -} -#else -static inline int restore_vsx(struct task_struct *tsk) { return 0; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -511,6 +470,53 @@ void giveup_all(struct task_struct *tsk) } EXPORT_SYMBOL(giveup_all); +#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_FPU +static int restore_fp(struct task_struct *tsk) +{ + if (tsk->thread.load_fp) { + load_fp_state(¤t->thread.fp_state); + current->thread.load_fp++; + return 1; + } + return 0; +} +#else +static int restore_fp(struct task_struct *tsk) { return 0; } +#endif /* CONFIG_PPC_FPU */ + +#ifdef CONFIG_ALTIVEC +#define loadvec(thr) ((thr).load_vec) +static int restore_altivec(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) { + load_vr_state(&tsk->thread.vr_state); + tsk->thread.used_vr = 1; + tsk->thread.load_vec++; + + return 1; + } + return 0; +} +#else +#define loadvec(thr) 0 +static inline int restore_altivec(struct task_struct *tsk) { return 0; } +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +static int restore_vsx(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_VSX)) { + tsk->thread.used_vsr = 1; + return 1; + } + + return 0; +} +#else +static inline int restore_vsx(struct task_struct *tsk) { return 0; } +#endif /* CONFIG_VSX */ + /* * The exception exit path calls restore_math() with interrupts hard disabled * but the soft irq state not "reconciled". ftrace code that calls @@ -551,6 +557,7 @@ void notrace restore_math(struct pt_regs *regs) regs->msr = msr; } +#endif static void save_all(struct task_struct *tsk) { @@ -1634,11 +1641,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, p->thread.regs = childregs; childregs->gpr[3] = 0; /* Result from fork() */ if (clone_flags & CLONE_SETTLS) { -#ifdef CONFIG_PPC64 if (!is_32bit_task()) childregs->gpr[13] = tls; else -#endif childregs->gpr[2] = tls; } @@ -1976,6 +1981,32 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, return 0; } +static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ +#ifdef CONFIG_PPC64 + unsigned long stack_page; + unsigned long cpu = task_cpu(p); + + stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + +# ifdef CONFIG_PPC_BOOK3S_64 + stack_page = (unsigned long)paca_ptrs[cpu]->nmi_emergency_sp - THREAD_SIZE; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + stack_page = (unsigned long)paca_ptrs[cpu]->mc_emergency_sp - THREAD_SIZE; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; +# endif +#endif + + return 0; +} + + int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes) { @@ -1987,7 +2018,10 @@ int validate_sp(unsigned long sp, struct task_struct *p, if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; - return valid_irq_stack(sp, p, nbytes); + if (valid_irq_stack(sp, p, nbytes)) + return 1; + + return valid_emergency_stack(sp, p, nbytes); } EXPORT_SYMBOL(validate_sp); @@ -2053,7 +2087,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) sp = (unsigned long) stack; if (sp == 0) { if (tsk == current) - sp = current_stack_pointer(); + sp = current_stack_frame(); else sp = tsk->thread.ksp; } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 577345382b23f9..806be751c3369b 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1773,6 +1773,9 @@ static void __init prom_rtas_os_term(char *str) if (token == 0) prom_panic("Could not get token for ibm,os-term\n"); os_term_args.token = cpu_to_be32(token); + os_term_args.nargs = cpu_to_be32(1); + os_term_args.nret = cpu_to_be32(1); + os_term_args.args[0] = cpu_to_be32(__pa(str)); prom_rtas_hcall((uint64_t)&os_term_args); } #endif /* CONFIG_PPC_SVM */ @@ -3474,7 +3477,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ hdr = dt_header_start; - /* Don't print anything after quiesce under OPAL, it crashes OFW */ prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); prom_debug("->dt_header_start=0x%lx\n", hdr); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c deleted file mode 100644 index 25c0424e8868b9..00000000000000 --- a/arch/powerpc/kernel/ptrace.c +++ /dev/null @@ -1,3468 +0,0 @@ -/* - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Derived from "arch/m68k/kernel/ptrace.c" - * Copyright (C) 1994 by Hamish Macdonald - * Taken from linux/kernel/ptrace.c and modified for M680x0. - * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds - * - * Modified by Cort Dougan (cort@hq.fsmlabs.com) - * and Paul Mackerras (paulus@samba.org). - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CREATE_TRACE_POINTS -#include - -/* - * The parameter save area on the stack is used to store arguments being passed - * to callee function and is located at fixed offset from stack pointer. - */ -#ifdef CONFIG_PPC32 -#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */ -#else /* CONFIG_PPC32 */ -#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */ -#endif - -struct pt_regs_offset { - const char *name; - int offset; -}; - -#define STR(s) #s /* convert to string */ -#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} -#define GPR_OFFSET_NAME(num) \ - {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \ - {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} -#define REG_OFFSET_END {.name = NULL, .offset = 0} - -#define TVSO(f) (offsetof(struct thread_vr_state, f)) -#define TFSO(f) (offsetof(struct thread_fp_state, f)) -#define TSO(f) (offsetof(struct thread_struct, f)) - -static const struct pt_regs_offset regoffset_table[] = { - GPR_OFFSET_NAME(0), - GPR_OFFSET_NAME(1), - GPR_OFFSET_NAME(2), - GPR_OFFSET_NAME(3), - GPR_OFFSET_NAME(4), - GPR_OFFSET_NAME(5), - GPR_OFFSET_NAME(6), - GPR_OFFSET_NAME(7), - GPR_OFFSET_NAME(8), - GPR_OFFSET_NAME(9), - GPR_OFFSET_NAME(10), - GPR_OFFSET_NAME(11), - GPR_OFFSET_NAME(12), - GPR_OFFSET_NAME(13), - GPR_OFFSET_NAME(14), - GPR_OFFSET_NAME(15), - GPR_OFFSET_NAME(16), - GPR_OFFSET_NAME(17), - GPR_OFFSET_NAME(18), - GPR_OFFSET_NAME(19), - GPR_OFFSET_NAME(20), - GPR_OFFSET_NAME(21), - GPR_OFFSET_NAME(22), - GPR_OFFSET_NAME(23), - GPR_OFFSET_NAME(24), - GPR_OFFSET_NAME(25), - GPR_OFFSET_NAME(26), - GPR_OFFSET_NAME(27), - GPR_OFFSET_NAME(28), - GPR_OFFSET_NAME(29), - GPR_OFFSET_NAME(30), - GPR_OFFSET_NAME(31), - REG_OFFSET_NAME(nip), - REG_OFFSET_NAME(msr), - REG_OFFSET_NAME(ctr), - REG_OFFSET_NAME(link), - REG_OFFSET_NAME(xer), - REG_OFFSET_NAME(ccr), -#ifdef CONFIG_PPC64 - REG_OFFSET_NAME(softe), -#else - REG_OFFSET_NAME(mq), -#endif - REG_OFFSET_NAME(trap), - REG_OFFSET_NAME(dar), - REG_OFFSET_NAME(dsisr), - REG_OFFSET_END, -}; - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -static void flush_tmregs_to_thread(struct task_struct *tsk) -{ - /* - * If task is not current, it will have been flushed already to - * it's thread_struct during __switch_to(). - * - * A reclaim flushes ALL the state or if not in TM save TM SPRs - * in the appropriate thread structures from live. - */ - - if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current)) - return; - - if (MSR_TM_SUSPENDED(mfmsr())) { - tm_reclaim_current(TM_CAUSE_SIGNAL); - } else { - tm_enable(); - tm_save_sprs(&(tsk->thread)); - } -} -#else -static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } -#endif - -/** - * regs_query_register_offset() - query register offset from its name - * @name: the name of a register - * - * regs_query_register_offset() returns the offset of a register in struct - * pt_regs from its name. If the name is invalid, this returns -EINVAL; - */ -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->offset; - return -EINVAL; -} - -/** - * regs_query_register_name() - query register name from its offset - * @offset: the offset of a register in struct pt_regs. - * - * regs_query_register_name() returns the name of a register from its - * offset in struct pt_regs. If the @offset is invalid, this returns NULL; - */ -const char *regs_query_register_name(unsigned int offset) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (roff->offset == offset) - return roff->name; - return NULL; -} - -/* - * does not yet catch signals sent when the child dies. - * in exit.c or in signal.c. - */ - -/* - * Set of msr bits that gdb can change on behalf of a process. - */ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -#define MSR_DEBUGCHANGE 0 -#else -#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) -#endif - -/* - * Max register writeable via put_reg - */ -#ifdef CONFIG_PPC32 -#define PT_MAX_PUT_REG PT_MQ -#else -#define PT_MAX_PUT_REG PT_CCR -#endif - -static unsigned long get_user_msr(struct task_struct *task) -{ - return task->thread.regs->msr | task->thread.fpexc_mode; -} - -static int set_user_msr(struct task_struct *task, unsigned long msr) -{ - task->thread.regs->msr &= ~MSR_DEBUGCHANGE; - task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; - return 0; -} - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -static unsigned long get_user_ckpt_msr(struct task_struct *task) -{ - return task->thread.ckpt_regs.msr | task->thread.fpexc_mode; -} - -static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr) -{ - task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE; - task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE; - return 0; -} - -static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap) -{ - task->thread.ckpt_regs.trap = trap & 0xfff0; - return 0; -} -#endif - -#ifdef CONFIG_PPC64 -static int get_user_dscr(struct task_struct *task, unsigned long *data) -{ - *data = task->thread.dscr; - return 0; -} - -static int set_user_dscr(struct task_struct *task, unsigned long dscr) -{ - task->thread.dscr = dscr; - task->thread.dscr_inherit = 1; - return 0; -} -#else -static int get_user_dscr(struct task_struct *task, unsigned long *data) -{ - return -EIO; -} - -static int set_user_dscr(struct task_struct *task, unsigned long dscr) -{ - return -EIO; -} -#endif - -/* - * We prevent mucking around with the reserved area of trap - * which are used internally by the kernel. - */ -static int set_user_trap(struct task_struct *task, unsigned long trap) -{ - task->thread.regs->trap = trap & 0xfff0; - return 0; -} - -/* - * Get contents of register REGNO in task TASK. - */ -int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) -{ - unsigned int regs_max; - - if ((task->thread.regs == NULL) || !data) - return -EIO; - - if (regno == PT_MSR) { - *data = get_user_msr(task); - return 0; - } - - if (regno == PT_DSCR) - return get_user_dscr(task, data); - -#ifdef CONFIG_PPC64 - /* - * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is - * no more used as a flag, lets force usr to alway see the softe value as 1 - * which means interrupts are not soft disabled. - */ - if (regno == PT_SOFTE) { - *data = 1; - return 0; - } -#endif - - regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long); - if (regno < regs_max) { - regno = array_index_nospec(regno, regs_max); - *data = ((unsigned long *)task->thread.regs)[regno]; - return 0; - } - - return -EIO; -} - -/* - * Write contents of register REGNO in task TASK. - */ -int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) -{ - if (task->thread.regs == NULL) - return -EIO; - - if (regno == PT_MSR) - return set_user_msr(task, data); - if (regno == PT_TRAP) - return set_user_trap(task, data); - if (regno == PT_DSCR) - return set_user_dscr(task, data); - - if (regno <= PT_MAX_PUT_REG) { - regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1); - ((unsigned long *)task->thread.regs)[regno] = data; - return 0; - } - return -EIO; -} - -static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int i, ret; - - if (target->thread.regs == NULL) - return -EIO; - - if (!FULL_REGS(target->thread.regs)) { - /* We have a partial register set. Fill 14-31 with bogus values */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.regs, - 0, offsetof(struct pt_regs, msr)); - if (!ret) { - unsigned long msr = get_user_msr(target); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, - offsetof(struct pt_regs, msr), - offsetof(struct pt_regs, msr) + - sizeof(msr)); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->orig_gpr3, - offsetof(struct pt_regs, orig_gpr3), - sizeof(struct user_pt_regs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct user_pt_regs), -1); - - return ret; -} - -static int gpr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - unsigned long reg; - int ret; - - if (target->thread.regs == NULL) - return -EIO; - - CHECK_FULL_REGS(target->thread.regs); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - target->thread.regs, - 0, PT_MSR * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_MSR * sizeof(reg), - (PT_MSR + 1) * sizeof(reg)); - if (!ret) - ret = set_user_msr(target, reg); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->orig_gpr3, - PT_ORIG_R3 * sizeof(reg), - (PT_MAX_PUT_REG + 1) * sizeof(reg)); - - if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_MAX_PUT_REG + 1) * sizeof(reg), - PT_TRAP * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_TRAP * sizeof(reg), - (PT_TRAP + 1) * sizeof(reg)); - if (!ret) - ret = set_user_trap(target, reg); - } - - if (!ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); - - return ret; -} - -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last checkpointed - * value of all FPR registers for the current transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - * }; - */ -static int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ -#ifdef CONFIG_VSX - u64 buf[33]; - int i; - - flush_fp_to_thread(target); - - /* copy to local buffer then write that out */ - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_FPR(i); - buf[32] = target->thread.fp_state.fpscr; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); -#else - BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32])); - - flush_fp_to_thread(target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fp_state, 0, -1); -#endif -} - -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last checkpointed - * value of all FPR registers for the current transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - * }; - * - */ -static int fpr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ -#ifdef CONFIG_VSX - u64 buf[33]; - int i; - - flush_fp_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_FPR(i); - buf[32] = target->thread.fp_state.fpscr; - - /* copy to local buffer then write that out */ - i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); - if (i) - return i; - - for (i = 0; i < 32 ; i++) - target->thread.TS_FPR(i) = buf[i]; - target->thread.fp_state.fpscr = buf[32]; - return 0; -#else - BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32])); - - flush_fp_to_thread(target); - - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fp_state, 0, -1); -#endif -} - -#ifdef CONFIG_ALTIVEC -/* - * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. - * The transfer totals 34 quadword. Quadwords 0-31 contain the - * corresponding vector registers. Quadword 32 contains the vscr as the - * last word (offset 12) within that quadword. Quadword 33 contains the - * vrsave as the first word (offset 0) within the quadword. - * - * This definition of the VMX state is compatible with the current PPC32 - * ptrace interface. This allows signal handling and ptrace to use the - * same structures. This also simplifies the implementation of a bi-arch - * (combined (32- and 64-bit) gdb. - */ - -static int vr_active(struct task_struct *target, - const struct user_regset *regset) -{ - flush_altivec_to_thread(target); - return target->thread.used_vr ? regset->n : 0; -} - -/* - * Regardless of transactions, 'vr_state' holds the current running - * value of all the VMX registers and 'ckvr_state' holds the last - * checkpointed value of all the VMX registers for the current - * transaction to fall back on in case it aborts. - * - * Userspace interface buffer layout: - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - * }; - */ -static int vr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - flush_altivec_to_thread(target); - - BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != - offsetof(struct thread_vr_state, vr[32])); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, - 33 * sizeof(vector128)); - if (!ret) { - /* - * Copy out only the low-order word of vrsave. - */ - int start, end; - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - - vrsave.word = target->thread.vrsave; - - start = 33 * sizeof(vector128); - end = start + sizeof(vrsave); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, - start, end); - } - - return ret; -} - -/* - * Regardless of transactions, 'vr_state' holds the current running - * value of all the VMX registers and 'ckvr_state' holds the last - * checkpointed value of all the VMX registers for the current - * transaction to fall back on in case it aborts. - * - * Userspace interface buffer layout: - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - * }; - */ -static int vr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - flush_altivec_to_thread(target); - - BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != - offsetof(struct thread_vr_state, vr[32])); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, - 33 * sizeof(vector128)); - if (!ret && count > 0) { - /* - * We use only the first word of vrsave. - */ - int start, end; - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - - vrsave.word = target->thread.vrsave; - - start = 33 * sizeof(vector128); - end = start + sizeof(vrsave); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, - start, end); - if (!ret) - target->thread.vrsave = vrsave.word; - } - - return ret; -} -#endif /* CONFIG_ALTIVEC */ - -#ifdef CONFIG_VSX -/* - * Currently to set and and get all the vsx state, you need to call - * the fp and VMX calls as well. This only get/sets the lower 32 - * 128bit VSX registers. - */ - -static int vsr_active(struct task_struct *target, - const struct user_regset *regset) -{ - flush_vsx_to_thread(target); - return target->thread.used_vsr ? regset->n : 0; -} - -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last - * checkpointed value of all FPR registers for the current - * transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 vsx[32]; - * }; - */ -static int vsr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - u64 buf[32]; - int ret, i; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - - return ret; -} - -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last - * checkpointed value of all FPR registers for the current - * transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 vsx[32]; - * }; - */ -static int vsr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 buf[32]; - int ret,i; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - if (!ret) - for (i = 0; i < 32 ; i++) - target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; - - return ret; -} -#endif /* CONFIG_VSX */ - -#ifdef CONFIG_SPE - -/* - * For get_evrregs/set_evrregs functions 'data' has the following layout: - * - * struct { - * u32 evr[32]; - * u64 acc; - * u32 spefscr; - * } - */ - -static int evr_active(struct task_struct *target, - const struct user_regset *regset) -{ - flush_spe_to_thread(target); - return target->thread.used_spe ? regset->n : 0; -} - -static int evr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - flush_spe_to_thread(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.evr, - 0, sizeof(target->thread.evr)); - - BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != - offsetof(struct thread_struct, spefscr)); - - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.acc, - sizeof(target->thread.evr), -1); - - return ret; -} - -static int evr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - flush_spe_to_thread(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.evr, - 0, sizeof(target->thread.evr)); - - BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != - offsetof(struct thread_struct, spefscr)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.acc, - sizeof(target->thread.evr), -1); - - return ret; -} -#endif /* CONFIG_SPE */ - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/** - * tm_cgpr_active - get active number of registers in CGPR - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in transaction checkpointed GPR category. - */ -static int tm_cgpr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - return regset->n; -} - -/** - * tm_cgpr_get - get CGPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets transaction checkpointed GPR registers. - * - * When the transaction is active, 'ckpt_regs' holds all the checkpointed - * GPR register values for the current transaction to fall back on if it - * aborts in between. This function gets those checkpointed GPR registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * struct pt_regs ckpt_regs; - * }; - */ -static int tm_cgpr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs, - 0, offsetof(struct pt_regs, msr)); - if (!ret) { - unsigned long msr = get_user_ckpt_msr(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, - offsetof(struct pt_regs, msr), - offsetof(struct pt_regs, msr) + - sizeof(msr)); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs.orig_gpr3, - offsetof(struct pt_regs, orig_gpr3), - sizeof(struct user_pt_regs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct user_pt_regs), -1); - - return ret; -} - -/* - * tm_cgpr_set - set the CGPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed GPR registers. - * - * When the transaction is active, 'ckpt_regs' holds the checkpointed - * GPR register values for the current transaction to fall back on if it - * aborts in between. This function sets those checkpointed GPR registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * struct pt_regs ckpt_regs; - * }; - */ -static int tm_cgpr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - unsigned long reg; - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs, - 0, PT_MSR * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_MSR * sizeof(reg), - (PT_MSR + 1) * sizeof(reg)); - if (!ret) - ret = set_user_ckpt_msr(target, reg); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs.orig_gpr3, - PT_ORIG_R3 * sizeof(reg), - (PT_MAX_PUT_REG + 1) * sizeof(reg)); - - if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_MAX_PUT_REG + 1) * sizeof(reg), - PT_TRAP * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_TRAP * sizeof(reg), - (PT_TRAP + 1) * sizeof(reg)); - if (!ret) - ret = set_user_ckpt_trap(target, reg); - } - - if (!ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); - - return ret; -} - -/** - * tm_cfpr_active - get active number of registers in CFPR - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in transaction checkpointed FPR category. - */ -static int tm_cfpr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - return regset->n; -} - -/** - * tm_cfpr_get - get CFPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets in transaction checkpointed FPR registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * values for the current transaction to fall back on if it aborts - * in between. This function gets those checkpointed FPR registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - *}; - */ -static int tm_cfpr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - u64 buf[33]; - int i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - /* copy to local buffer then write that out */ - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_CKFPR(i); - buf[32] = target->thread.ckfp_state.fpscr; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); -} - -/** - * tm_cfpr_set - set CFPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed FPR registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * FPR register values for the current transaction to fall back on - * if it aborts in between. This function sets these checkpointed - * FPR registers. The userspace interface buffer layout is as follows. - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - *}; - */ -static int tm_cfpr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 buf[33]; - int i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - for (i = 0; i < 32; i++) - buf[i] = target->thread.TS_CKFPR(i); - buf[32] = target->thread.ckfp_state.fpscr; - - /* copy to local buffer then write that out */ - i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); - if (i) - return i; - for (i = 0; i < 32 ; i++) - target->thread.TS_CKFPR(i) = buf[i]; - target->thread.ckfp_state.fpscr = buf[32]; - return 0; -} - -/** - * tm_cvmx_active - get active number of registers in CVMX - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in checkpointed VMX category. - */ -static int tm_cvmx_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - return regset->n; -} - -/** - * tm_cvmx_get - get CMVX registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets in transaction checkpointed VMX registers. - * - * When the transaction is active 'ckvr_state' and 'ckvrsave' hold - * the checkpointed values for the current transaction to fall - * back on if it aborts in between. The userspace interface buffer - * layout is as follows. - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - *}; - */ -static int tm_cvmx_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - /* Flush the state */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckvr_state, 0, - 33 * sizeof(vector128)); - if (!ret) { - /* - * Copy out only the low-order word of vrsave. - */ - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - vrsave.word = target->thread.ckvrsave; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, - 33 * sizeof(vector128), -1); - } - - return ret; -} - -/** - * tm_cvmx_set - set CMVX registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed VMX registers. - * - * When the transaction is active 'ckvr_state' and 'ckvrsave' hold - * the checkpointed values for the current transaction to fall - * back on if it aborts in between. The userspace interface buffer - * layout is as follows. - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - *}; - */ -static int tm_cvmx_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ckvr_state, 0, - 33 * sizeof(vector128)); - if (!ret && count > 0) { - /* - * We use only the low-order word of vrsave. - */ - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - vrsave.word = target->thread.ckvrsave; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, - 33 * sizeof(vector128), -1); - if (!ret) - target->thread.ckvrsave = vrsave.word; - } - - return ret; -} - -/** - * tm_cvsx_active - get active number of registers in CVSX - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in transaction checkpointed VSX category. - */ -static int tm_cvsx_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - flush_vsx_to_thread(target); - return target->thread.used_vsr ? regset->n : 0; -} - -/** - * tm_cvsx_get - get CVSX registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets in transaction checkpointed VSX registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * values for the current transaction to fall back on if it aborts - * in between. This function gets those checkpointed VSX registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * u64 vsx[32]; - *}; - */ -static int tm_cvsx_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - u64 buf[32]; - int ret, i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - /* Flush the state */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - - return ret; -} - -/** - * tm_cvsx_set - set CFPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed VSX registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * VSX register values for the current transaction to fall back on - * if it aborts in between. This function sets these checkpointed - * FPR registers. The userspace interface buffer layout is as follows. - * - * struct data { - * u64 vsx[32]; - *}; - */ -static int tm_cvsx_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 buf[32]; - int ret, i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - /* Flush the state */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - if (!ret) - for (i = 0; i < 32 ; i++) - target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; - - return ret; -} - -/** - * tm_spr_active - get active number of registers in TM SPR - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks the active number of available - * regisers in the transactional memory SPR category. - */ -static int tm_spr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - return regset->n; -} - -/** - * tm_spr_get - get the TM related SPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets transactional memory related SPR registers. - * The userspace interface buffer layout is as follows. - * - * struct { - * u64 tm_tfhar; - * u64 tm_texasr; - * u64 tm_tfiar; - * }; - */ -static int tm_spr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - /* Build tests */ - BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); - BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); - BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - /* Flush the states */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - /* TFHAR register */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfhar, 0, sizeof(u64)); - - /* TEXASR register */ - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_texasr, sizeof(u64), - 2 * sizeof(u64)); - - /* TFIAR register */ - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfiar, - 2 * sizeof(u64), 3 * sizeof(u64)); - return ret; -} - -/** - * tm_spr_set - set the TM related SPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets transactional memory related SPR registers. - * The userspace interface buffer layout is as follows. - * - * struct { - * u64 tm_tfhar; - * u64 tm_texasr; - * u64 tm_tfiar; - * }; - */ -static int tm_spr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - /* Build tests */ - BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); - BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); - BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - /* Flush the states */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - /* TFHAR register */ - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfhar, 0, sizeof(u64)); - - /* TEXASR register */ - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_texasr, sizeof(u64), - 2 * sizeof(u64)); - - /* TFIAR register */ - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfiar, - 2 * sizeof(u64), 3 * sizeof(u64)); - return ret; -} - -static int tm_tar_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - return regset->n; - - return 0; -} - -static int tm_tar_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tar, 0, sizeof(u64)); - return ret; -} - -static int tm_tar_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tar, 0, sizeof(u64)); - return ret; -} - -static int tm_ppr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - return regset->n; - - return 0; -} - - -static int tm_ppr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_ppr, 0, sizeof(u64)); - return ret; -} - -static int tm_ppr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_ppr, 0, sizeof(u64)); - return ret; -} - -static int tm_dscr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - return regset->n; - - return 0; -} - -static int tm_dscr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_dscr, 0, sizeof(u64)); - return ret; -} - -static int tm_dscr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_dscr, 0, sizeof(u64)); - return ret; -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - -#ifdef CONFIG_PPC64 -static int ppr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->ppr, 0, sizeof(u64)); -} - -static int ppr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->ppr, 0, sizeof(u64)); -} - -static int dscr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.dscr, 0, sizeof(u64)); -} -static int dscr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.dscr, 0, sizeof(u64)); -} -#endif -#ifdef CONFIG_PPC_BOOK3S_64 -static int tar_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tar, 0, sizeof(u64)); -} -static int tar_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tar, 0, sizeof(u64)); -} - -static int ebb_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - if (target->thread.used_ebb) - return regset->n; - - return 0; -} - -static int ebb_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - /* Build tests */ - BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); - BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - if (!target->thread.used_ebb) - return -ENODATA; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ebbrr, 0, 3 * sizeof(unsigned long)); -} - -static int ebb_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret = 0; - - /* Build tests */ - BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); - BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - if (target->thread.used_ebb) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ebbrr, 0, sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ebbhr, sizeof(unsigned long), - 2 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.bescr, - 2 * sizeof(unsigned long), 3 * sizeof(unsigned long)); - - return ret; -} -static int pmu_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - return regset->n; -} - -static int pmu_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - /* Build tests */ - BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); - BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); - BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); - BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.siar, 0, - 5 * sizeof(unsigned long)); -} - -static int pmu_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret = 0; - - /* Build tests */ - BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); - BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); - BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); - BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.siar, 0, - sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.sdar, sizeof(unsigned long), - 2 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.sier, 2 * sizeof(unsigned long), - 3 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.mmcr2, 3 * sizeof(unsigned long), - 4 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.mmcr0, 4 * sizeof(unsigned long), - 5 * sizeof(unsigned long)); - return ret; -} -#endif - -#ifdef CONFIG_PPC_MEM_KEYS -static int pkey_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!arch_pkeys_enabled()) - return -ENODEV; - - return regset->n; -} - -static int pkey_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); - BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); - - if (!arch_pkeys_enabled()) - return -ENODEV; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.amr, 0, - ELF_NPKEY * sizeof(unsigned long)); -} - -static int pkey_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 new_amr; - int ret; - - if (!arch_pkeys_enabled()) - return -ENODEV; - - /* Only the AMR can be set from userspace */ - if (pos != 0 || count != sizeof(new_amr)) - return -EINVAL; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &new_amr, 0, sizeof(new_amr)); - if (ret) - return ret; - - /* UAMOR determines which bits of the AMR can be set from userspace. */ - target->thread.amr = (new_amr & target->thread.uamor) | - (target->thread.amr & ~target->thread.uamor); - - return 0; -} -#endif /* CONFIG_PPC_MEM_KEYS */ - -/* - * These are our native regset flavors. - */ -enum powerpc_regset { - REGSET_GPR, - REGSET_FPR, -#ifdef CONFIG_ALTIVEC - REGSET_VMX, -#endif -#ifdef CONFIG_VSX - REGSET_VSX, -#endif -#ifdef CONFIG_SPE - REGSET_SPE, -#endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - REGSET_TM_CGPR, /* TM checkpointed GPR registers */ - REGSET_TM_CFPR, /* TM checkpointed FPR registers */ - REGSET_TM_CVMX, /* TM checkpointed VMX registers */ - REGSET_TM_CVSX, /* TM checkpointed VSX registers */ - REGSET_TM_SPR, /* TM specific SPR registers */ - REGSET_TM_CTAR, /* TM checkpointed TAR register */ - REGSET_TM_CPPR, /* TM checkpointed PPR register */ - REGSET_TM_CDSCR, /* TM checkpointed DSCR register */ -#endif -#ifdef CONFIG_PPC64 - REGSET_PPR, /* PPR register */ - REGSET_DSCR, /* DSCR register */ -#endif -#ifdef CONFIG_PPC_BOOK3S_64 - REGSET_TAR, /* TAR register */ - REGSET_EBB, /* EBB registers */ - REGSET_PMR, /* Performance Monitor Registers */ -#endif -#ifdef CONFIG_PPC_MEM_KEYS - REGSET_PKEY, /* AMR register */ -#endif -}; - -static const struct user_regset native_regsets[] = { - [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, - .size = sizeof(long), .align = sizeof(long), - .get = gpr_get, .set = gpr_set - }, - [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .get = fpr_get, .set = fpr_set - }, -#ifdef CONFIG_ALTIVEC - [REGSET_VMX] = { - .core_note_type = NT_PPC_VMX, .n = 34, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = vr_active, .get = vr_get, .set = vr_set - }, -#endif -#ifdef CONFIG_VSX - [REGSET_VSX] = { - .core_note_type = NT_PPC_VSX, .n = 32, - .size = sizeof(double), .align = sizeof(double), - .active = vsr_active, .get = vsr_get, .set = vsr_set - }, -#endif -#ifdef CONFIG_SPE - [REGSET_SPE] = { - .core_note_type = NT_PPC_SPE, .n = 35, - .size = sizeof(u32), .align = sizeof(u32), - .active = evr_active, .get = evr_get, .set = evr_set - }, -#endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - [REGSET_TM_CGPR] = { - .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, - .size = sizeof(long), .align = sizeof(long), - .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set - }, - [REGSET_TM_CFPR] = { - .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set - }, - [REGSET_TM_CVMX] = { - .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set - }, - [REGSET_TM_CVSX] = { - .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set - }, - [REGSET_TM_SPR] = { - .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set - }, - [REGSET_TM_CTAR] = { - .core_note_type = NT_PPC_TM_CTAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set - }, - [REGSET_TM_CPPR] = { - .core_note_type = NT_PPC_TM_CPPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set - }, - [REGSET_TM_CDSCR] = { - .core_note_type = NT_PPC_TM_CDSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set - }, -#endif -#ifdef CONFIG_PPC64 - [REGSET_PPR] = { - .core_note_type = NT_PPC_PPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = ppr_get, .set = ppr_set - }, - [REGSET_DSCR] = { - .core_note_type = NT_PPC_DSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = dscr_get, .set = dscr_set - }, -#endif -#ifdef CONFIG_PPC_BOOK3S_64 - [REGSET_TAR] = { - .core_note_type = NT_PPC_TAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = tar_get, .set = tar_set - }, - [REGSET_EBB] = { - .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, - .size = sizeof(u64), .align = sizeof(u64), - .active = ebb_active, .get = ebb_get, .set = ebb_set - }, - [REGSET_PMR] = { - .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, - .size = sizeof(u64), .align = sizeof(u64), - .active = pmu_active, .get = pmu_get, .set = pmu_set - }, -#endif -#ifdef CONFIG_PPC_MEM_KEYS - [REGSET_PKEY] = { - .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, - .size = sizeof(u64), .align = sizeof(u64), - .active = pkey_active, .get = pkey_get, .set = pkey_set - }, -#endif -}; - -static const struct user_regset_view user_ppc_native_view = { - .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI, - .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) -}; - -#ifdef CONFIG_PPC64 -#include - -static int gpr32_get_common(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, - unsigned long *regs) -{ - compat_ulong_t *k = kbuf; - compat_ulong_t __user *u = ubuf; - compat_ulong_t reg; - - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - *k++ = regs[pos++]; - else - for (; count > 0 && pos < PT_MSR; --count) - if (__put_user((compat_ulong_t) regs[pos++], u++)) - return -EFAULT; - - if (count > 0 && pos == PT_MSR) { - reg = get_user_msr(target); - if (kbuf) - *k++ = reg; - else if (__put_user(reg, u++)) - return -EFAULT; - ++pos; - --count; - } - - if (kbuf) - for (; count > 0 && pos < PT_REGS_COUNT; --count) - *k++ = regs[pos++]; - else - for (; count > 0 && pos < PT_REGS_COUNT; --count) - if (__put_user((compat_ulong_t) regs[pos++], u++)) - return -EFAULT; - - kbuf = k; - ubuf = u; - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - PT_REGS_COUNT * sizeof(reg), -1); -} - -static int gpr32_set_common(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf, - unsigned long *regs) -{ - const compat_ulong_t *k = kbuf; - const compat_ulong_t __user *u = ubuf; - compat_ulong_t reg; - - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - regs[pos++] = *k++; - else - for (; count > 0 && pos < PT_MSR; --count) { - if (__get_user(reg, u++)) - return -EFAULT; - regs[pos++] = reg; - } - - - if (count > 0 && pos == PT_MSR) { - if (kbuf) - reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; - set_user_msr(target, reg); - ++pos; - --count; - } - - if (kbuf) { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) - regs[pos++] = *k++; - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - ++k; - } else { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { - if (__get_user(reg, u++)) - return -EFAULT; - regs[pos++] = reg; - } - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - if (__get_user(reg, u++)) - return -EFAULT; - } - - if (count > 0 && pos == PT_TRAP) { - if (kbuf) - reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; - set_user_trap(target, reg); - ++pos; - --count; - } - - kbuf = k; - ubuf = u; - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); -} - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -static int tm_cgpr32_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.ckpt_regs.gpr[0]); -} - -static int tm_cgpr32_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.ckpt_regs.gpr[0]); -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - -static int gpr32_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int i; - - if (target->thread.regs == NULL) - return -EIO; - - if (!FULL_REGS(target->thread.regs)) { - /* - * We have a partial register set. - * Fill 14-31 with bogus values. - */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.regs->gpr[0]); -} - -static int gpr32_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - if (target->thread.regs == NULL) - return -EIO; - - CHECK_FULL_REGS(target->thread.regs); - return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.regs->gpr[0]); -} - -/* - * These are the regset flavors matching the CONFIG_PPC32 native set. - */ -static const struct user_regset compat_regsets[] = { - [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, - .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = gpr32_get, .set = gpr32_set - }, - [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .get = fpr_get, .set = fpr_set - }, -#ifdef CONFIG_ALTIVEC - [REGSET_VMX] = { - .core_note_type = NT_PPC_VMX, .n = 34, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = vr_active, .get = vr_get, .set = vr_set - }, -#endif -#ifdef CONFIG_SPE - [REGSET_SPE] = { - .core_note_type = NT_PPC_SPE, .n = 35, - .size = sizeof(u32), .align = sizeof(u32), - .active = evr_active, .get = evr_get, .set = evr_set - }, -#endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - [REGSET_TM_CGPR] = { - .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, - .size = sizeof(long), .align = sizeof(long), - .active = tm_cgpr_active, - .get = tm_cgpr32_get, .set = tm_cgpr32_set - }, - [REGSET_TM_CFPR] = { - .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set - }, - [REGSET_TM_CVMX] = { - .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set - }, - [REGSET_TM_CVSX] = { - .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set - }, - [REGSET_TM_SPR] = { - .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set - }, - [REGSET_TM_CTAR] = { - .core_note_type = NT_PPC_TM_CTAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set - }, - [REGSET_TM_CPPR] = { - .core_note_type = NT_PPC_TM_CPPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set - }, - [REGSET_TM_CDSCR] = { - .core_note_type = NT_PPC_TM_CDSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set - }, -#endif -#ifdef CONFIG_PPC64 - [REGSET_PPR] = { - .core_note_type = NT_PPC_PPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = ppr_get, .set = ppr_set - }, - [REGSET_DSCR] = { - .core_note_type = NT_PPC_DSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = dscr_get, .set = dscr_set - }, -#endif -#ifdef CONFIG_PPC_BOOK3S_64 - [REGSET_TAR] = { - .core_note_type = NT_PPC_TAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = tar_get, .set = tar_set - }, - [REGSET_EBB] = { - .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, - .size = sizeof(u64), .align = sizeof(u64), - .active = ebb_active, .get = ebb_get, .set = ebb_set - }, -#endif -}; - -static const struct user_regset_view user_ppc_compat_view = { - .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI, - .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets) -}; -#endif /* CONFIG_PPC64 */ - -const struct user_regset_view *task_user_regset_view(struct task_struct *task) -{ -#ifdef CONFIG_PPC64 - if (test_tsk_thread_flag(task, TIF_32BIT)) - return &user_ppc_compat_view; -#endif - return &user_ppc_native_view; -} - - -void user_enable_single_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - - if (regs != NULL) { -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - task->thread.debug.dbcr0 &= ~DBCR0_BT; - task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; - regs->msr |= MSR_DE; -#else - regs->msr &= ~MSR_BE; - regs->msr |= MSR_SE; -#endif - } - set_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -void user_enable_block_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - - if (regs != NULL) { -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - task->thread.debug.dbcr0 &= ~DBCR0_IC; - task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT; - regs->msr |= MSR_DE; -#else - regs->msr &= ~MSR_SE; - regs->msr |= MSR_BE; -#endif - } - set_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -void user_disable_single_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - - if (regs != NULL) { -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - /* - * The logic to disable single stepping should be as - * simple as turning off the Instruction Complete flag. - * And, after doing so, if all debug flags are off, turn - * off DBCR0(IDM) and MSR(DE) .... Torez - */ - task->thread.debug.dbcr0 &= ~(DBCR0_IC|DBCR0_BT); - /* - * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. - */ - if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, - task->thread.debug.dbcr1)) { - /* - * All debug events were off..... - */ - task->thread.debug.dbcr0 &= ~DBCR0_IDM; - regs->msr &= ~MSR_DE; - } -#else - regs->msr &= ~(MSR_SE | MSR_BE); -#endif - } - clear_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -#ifdef CONFIG_HAVE_HW_BREAKPOINT -void ptrace_triggered(struct perf_event *bp, - struct perf_sample_data *data, struct pt_regs *regs) -{ - struct perf_event_attr attr; - - /* - * Disable the breakpoint request here since ptrace has defined a - * one-shot behaviour for breakpoint exceptions in PPC64. - * The SIGTRAP signal is generated automatically for us in do_dabr(). - * We don't have to do anything about that here - */ - attr = bp->attr; - attr.disabled = true; - modify_user_hw_breakpoint(bp, &attr); -} -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - -static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, - unsigned long data) -{ -#ifdef CONFIG_HAVE_HW_BREAKPOINT - int ret; - struct thread_struct *thread = &(task->thread); - struct perf_event *bp; - struct perf_event_attr attr; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - bool set_bp = true; - struct arch_hw_breakpoint hw_brk; -#endif - - /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). - * For embedded processors we support one DAC and no IAC's at the - * moment. - */ - if (addr > 0) - return -EINVAL; - - /* The bottom 3 bits in dabr are flags */ - if ((data & ~0x7UL) >= TASK_SIZE) - return -EIO; - -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. - * It was assumed, on previous implementations, that 3 bits were - * passed together with the data address, fitting the design of the - * DABR register, as follows: - * - * bit 0: Read flag - * bit 1: Write flag - * bit 2: Breakpoint translation - * - * Thus, we use them here as so. - */ - - /* Ensure breakpoint translation bit is set */ - if (data && !(data & HW_BRK_TYPE_TRANSLATE)) - return -EIO; - hw_brk.address = data & (~HW_BRK_TYPE_DABR); - hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; - hw_brk.len = DABR_MAX_LEN; - hw_brk.hw_len = DABR_MAX_LEN; - set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); -#ifdef CONFIG_HAVE_HW_BREAKPOINT - bp = thread->ptrace_bps[0]; - if (!set_bp) { - if (bp) { - unregister_hw_breakpoint(bp); - thread->ptrace_bps[0] = NULL; - } - return 0; - } - if (bp) { - attr = bp->attr; - attr.bp_addr = hw_brk.address; - attr.bp_len = DABR_MAX_LEN; - arch_bp_generic_fields(hw_brk.type, &attr.bp_type); - - /* Enable breakpoint */ - attr.disabled = false; - - ret = modify_user_hw_breakpoint(bp, &attr); - if (ret) { - return ret; - } - thread->ptrace_bps[0] = bp; - thread->hw_brk = hw_brk; - return 0; - } - - /* Create a new breakpoint request if one doesn't exist already */ - hw_breakpoint_init(&attr); - attr.bp_addr = hw_brk.address; - attr.bp_len = DABR_MAX_LEN; - arch_bp_generic_fields(hw_brk.type, - &attr.bp_type); - - thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, - ptrace_triggered, NULL, task); - if (IS_ERR(bp)) { - thread->ptrace_bps[0] = NULL; - return PTR_ERR(bp); - } - -#else /* !CONFIG_HAVE_HW_BREAKPOINT */ - if (set_bp && (!ppc_breakpoint_available())) - return -ENODEV; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - task->thread.hw_brk = hw_brk; -#else /* CONFIG_PPC_ADV_DEBUG_REGS */ - /* As described above, it was assumed 3 bits were passed with the data - * address, but we will assume only the mode bits will be passed - * as to not cause alignment restrictions for DAC-based processors. - */ - - /* DAC's hold the whole address without any mode flags */ - task->thread.debug.dac1 = data & ~0x3UL; - - if (task->thread.debug.dac1 == 0) { - dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); - if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, - task->thread.debug.dbcr1)) { - task->thread.regs->msr &= ~MSR_DE; - task->thread.debug.dbcr0 &= ~DBCR0_IDM; - } - return 0; - } - - /* Read or Write bits must be set */ - - if (!(data & 0x3UL)) - return -EINVAL; - - /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 - register */ - task->thread.debug.dbcr0 |= DBCR0_IDM; - - /* Check for write and read flags and set DBCR0 - accordingly */ - dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W); - if (data & 0x1UL) - dbcr_dac(task) |= DBCR_DAC1R; - if (data & 0x2UL) - dbcr_dac(task) |= DBCR_DAC1W; - task->thread.regs->msr |= MSR_DE; -#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - return 0; -} - -/* - * Called by kernel/ptrace.c when detaching.. - * - * Make sure single step bits etc are not set. - */ -void ptrace_disable(struct task_struct *child) -{ - /* make sure the single step bit is not set. */ - user_disable_single_step(child); -} - -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -static long set_instruction_bp(struct task_struct *child, - struct ppc_hw_breakpoint *bp_info) -{ - int slot; - int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0); - int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0); - int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0); - int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0); - - if (dbcr_iac_range(child) & DBCR_IAC12MODE) - slot2_in_use = 1; - if (dbcr_iac_range(child) & DBCR_IAC34MODE) - slot4_in_use = 1; - - if (bp_info->addr >= TASK_SIZE) - return -EIO; - - if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { - - /* Make sure range is valid. */ - if (bp_info->addr2 >= TASK_SIZE) - return -EIO; - - /* We need a pair of IAC regsisters */ - if ((!slot1_in_use) && (!slot2_in_use)) { - slot = 1; - child->thread.debug.iac1 = bp_info->addr; - child->thread.debug.iac2 = bp_info->addr2; - child->thread.debug.dbcr0 |= DBCR0_IAC1; - if (bp_info->addr_mode == - PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) - dbcr_iac_range(child) |= DBCR_IAC12X; - else - dbcr_iac_range(child) |= DBCR_IAC12I; -#if CONFIG_PPC_ADV_DEBUG_IACS > 2 - } else if ((!slot3_in_use) && (!slot4_in_use)) { - slot = 3; - child->thread.debug.iac3 = bp_info->addr; - child->thread.debug.iac4 = bp_info->addr2; - child->thread.debug.dbcr0 |= DBCR0_IAC3; - if (bp_info->addr_mode == - PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) - dbcr_iac_range(child) |= DBCR_IAC34X; - else - dbcr_iac_range(child) |= DBCR_IAC34I; -#endif - } else - return -ENOSPC; - } else { - /* We only need one. If possible leave a pair free in - * case a range is needed later - */ - if (!slot1_in_use) { - /* - * Don't use iac1 if iac1-iac2 are free and either - * iac3 or iac4 (but not both) are free - */ - if (slot2_in_use || (slot3_in_use == slot4_in_use)) { - slot = 1; - child->thread.debug.iac1 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC1; - goto out; - } - } - if (!slot2_in_use) { - slot = 2; - child->thread.debug.iac2 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC2; -#if CONFIG_PPC_ADV_DEBUG_IACS > 2 - } else if (!slot3_in_use) { - slot = 3; - child->thread.debug.iac3 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC3; - } else if (!slot4_in_use) { - slot = 4; - child->thread.debug.iac4 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC4; -#endif - } else - return -ENOSPC; - } -out: - child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; - - return slot; -} - -static int del_instruction_bp(struct task_struct *child, int slot) -{ - switch (slot) { - case 1: - if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC12MODE) { - /* address range - clear slots 1 & 2 */ - child->thread.debug.iac2 = 0; - dbcr_iac_range(child) &= ~DBCR_IAC12MODE; - } - child->thread.debug.iac1 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC1; - break; - case 2: - if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC12MODE) - /* used in a range */ - return -EINVAL; - child->thread.debug.iac2 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC2; - break; -#if CONFIG_PPC_ADV_DEBUG_IACS > 2 - case 3: - if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC34MODE) { - /* address range - clear slots 3 & 4 */ - child->thread.debug.iac4 = 0; - dbcr_iac_range(child) &= ~DBCR_IAC34MODE; - } - child->thread.debug.iac3 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC3; - break; - case 4: - if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC34MODE) - /* Used in a range */ - return -EINVAL; - child->thread.debug.iac4 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC4; - break; -#endif - default: - return -EINVAL; - } - return 0; -} - -static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) -{ - int byte_enable = - (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT) - & 0xf; - int condition_mode = - bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE; - int slot; - - if (byte_enable && (condition_mode == 0)) - return -EINVAL; - - if (bp_info->addr >= TASK_SIZE) - return -EIO; - - if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) { - slot = 1; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - dbcr_dac(child) |= DBCR_DAC1R; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - dbcr_dac(child) |= DBCR_DAC1W; - child->thread.debug.dac1 = (unsigned long)bp_info->addr; -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - if (byte_enable) { - child->thread.debug.dvc1 = - (unsigned long)bp_info->condition_value; - child->thread.debug.dbcr2 |= - ((byte_enable << DBCR2_DVC1BE_SHIFT) | - (condition_mode << DBCR2_DVC1M_SHIFT)); - } -#endif -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { - /* Both dac1 and dac2 are part of a range */ - return -ENOSPC; -#endif - } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) { - slot = 2; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - dbcr_dac(child) |= DBCR_DAC2R; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - dbcr_dac(child) |= DBCR_DAC2W; - child->thread.debug.dac2 = (unsigned long)bp_info->addr; -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - if (byte_enable) { - child->thread.debug.dvc2 = - (unsigned long)bp_info->condition_value; - child->thread.debug.dbcr2 |= - ((byte_enable << DBCR2_DVC2BE_SHIFT) | - (condition_mode << DBCR2_DVC2M_SHIFT)); - } -#endif - } else - return -ENOSPC; - child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; - - return slot + 4; -} - -static int del_dac(struct task_struct *child, int slot) -{ - if (slot == 1) { - if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) - return -ENOENT; - - child->thread.debug.dac1 = 0; - dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { - child->thread.debug.dac2 = 0; - child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE; - } - child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); -#endif -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - child->thread.debug.dvc1 = 0; -#endif - } else if (slot == 2) { - if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) - return -ENOENT; - -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) - /* Part of a range */ - return -EINVAL; - child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); -#endif -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - child->thread.debug.dvc2 = 0; -#endif - child->thread.debug.dac2 = 0; - dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); - } else - return -EINVAL; - - return 0; -} -#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE -static int set_dac_range(struct task_struct *child, - struct ppc_hw_breakpoint *bp_info) -{ - int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK; - - /* We don't allow range watchpoints to be used with DVC */ - if (bp_info->condition_mode) - return -EINVAL; - - /* - * Best effort to verify the address range. The user/supervisor bits - * prevent trapping in kernel space, but let's fail on an obvious bad - * range. The simple test on the mask is not fool-proof, and any - * exclusive range will spill over into kernel space. - */ - if (bp_info->addr >= TASK_SIZE) - return -EIO; - if (mode == PPC_BREAKPOINT_MODE_MASK) { - /* - * dac2 is a bitmask. Don't allow a mask that makes a - * kernel space address from a valid dac1 value - */ - if (~((unsigned long)bp_info->addr2) >= TASK_SIZE) - return -EIO; - } else { - /* - * For range breakpoints, addr2 must also be a valid address - */ - if (bp_info->addr2 >= TASK_SIZE) - return -EIO; - } - - if (child->thread.debug.dbcr0 & - (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) - return -ENOSPC; - - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); - child->thread.debug.dac1 = bp_info->addr; - child->thread.debug.dac2 = bp_info->addr2; - if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) - child->thread.debug.dbcr2 |= DBCR2_DAC12M; - else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) - child->thread.debug.dbcr2 |= DBCR2_DAC12MX; - else /* PPC_BREAKPOINT_MODE_MASK */ - child->thread.debug.dbcr2 |= DBCR2_DAC12MM; - child->thread.regs->msr |= MSR_DE; - - return 5; -} -#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */ - -static long ppc_set_hwdebug(struct task_struct *child, - struct ppc_hw_breakpoint *bp_info) -{ -#ifdef CONFIG_HAVE_HW_BREAKPOINT - int len = 0; - struct thread_struct *thread = &(child->thread); - struct perf_event *bp; - struct perf_event_attr attr; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - struct arch_hw_breakpoint brk; -#endif - - if (bp_info->version != 1) - return -ENOTSUPP; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - /* - * Check for invalid flags and combinations - */ - if ((bp_info->trigger_type == 0) || - (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE | - PPC_BREAKPOINT_TRIGGER_RW)) || - (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) || - (bp_info->condition_mode & - ~(PPC_BREAKPOINT_CONDITION_MODE | - PPC_BREAKPOINT_CONDITION_BE_ALL))) - return -EINVAL; -#if CONFIG_PPC_ADV_DEBUG_DVCS == 0 - if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) - return -EINVAL; -#endif - - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) { - if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) || - (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) - return -EINVAL; - return set_instruction_bp(child, bp_info); - } - if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) - return set_dac(child, bp_info); - -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - return set_dac_range(child, bp_info); -#else - return -EINVAL; -#endif -#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */ - /* - * We only support one data breakpoint - */ - if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 || - (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 || - bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) - return -EINVAL; - - if ((unsigned long)bp_info->addr >= TASK_SIZE) - return -EIO; - - brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN; - brk.type = HW_BRK_TYPE_TRANSLATE; - brk.len = DABR_MAX_LEN; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - brk.type |= HW_BRK_TYPE_READ; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - brk.type |= HW_BRK_TYPE_WRITE; -#ifdef CONFIG_HAVE_HW_BREAKPOINT - if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) - len = bp_info->addr2 - bp_info->addr; - else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) - len = 1; - else - return -EINVAL; - bp = thread->ptrace_bps[0]; - if (bp) - return -ENOSPC; - - /* Create a new breakpoint request if one doesn't exist already */ - hw_breakpoint_init(&attr); - attr.bp_addr = (unsigned long)bp_info->addr; - attr.bp_len = len; - arch_bp_generic_fields(brk.type, &attr.bp_type); - - thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, - ptrace_triggered, NULL, child); - if (IS_ERR(bp)) { - thread->ptrace_bps[0] = NULL; - return PTR_ERR(bp); - } - - return 1; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - - if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) - return -EINVAL; - - if (child->thread.hw_brk.address) - return -ENOSPC; - - if (!ppc_breakpoint_available()) - return -ENODEV; - - child->thread.hw_brk = brk; - - return 1; -#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ -} - -static long ppc_del_hwdebug(struct task_struct *child, long data) -{ -#ifdef CONFIG_HAVE_HW_BREAKPOINT - int ret = 0; - struct thread_struct *thread = &(child->thread); - struct perf_event *bp; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - int rc; - - if (data <= 4) - rc = del_instruction_bp(child, (int)data); - else - rc = del_dac(child, (int)data - 4); - - if (!rc) { - if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, - child->thread.debug.dbcr1)) { - child->thread.debug.dbcr0 &= ~DBCR0_IDM; - child->thread.regs->msr &= ~MSR_DE; - } - } - return rc; -#else - if (data != 1) - return -EINVAL; - -#ifdef CONFIG_HAVE_HW_BREAKPOINT - bp = thread->ptrace_bps[0]; - if (bp) { - unregister_hw_breakpoint(bp); - thread->ptrace_bps[0] = NULL; - } else - ret = -ENOENT; - return ret; -#else /* CONFIG_HAVE_HW_BREAKPOINT */ - if (child->thread.hw_brk.address == 0) - return -ENOENT; - - child->thread.hw_brk.address = 0; - child->thread.hw_brk.type = 0; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - - return 0; -#endif -} - -long arch_ptrace(struct task_struct *child, long request, - unsigned long addr, unsigned long data) -{ - int ret = -EPERM; - void __user *datavp = (void __user *) data; - unsigned long __user *datalp = datavp; - - switch (request) { - /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long index, tmp; - - ret = -EIO; - /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif - break; - - CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { - ret = ptrace_get_reg(child, (int) index, &tmp); - if (ret) - break; - } else { - unsigned int fpidx = index - PT_FPR0; - - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&tmp, &child->thread.TS_FPR(fpidx), - sizeof(long)); - else - tmp = child->thread.fp_state.fpscr; - } - ret = put_user(tmp, datalp); - break; - } - - /* write the word at location addr in the USER area */ - case PTRACE_POKEUSR: { - unsigned long index; - - ret = -EIO; - /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif - break; - - CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { - ret = ptrace_put_reg(child, index, data); - } else { - unsigned int fpidx = index - PT_FPR0; - - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, - sizeof(long)); - else - child->thread.fp_state.fpscr = data; - ret = 0; - } - break; - } - - case PPC_PTRACE_GETHWDBGINFO: { - struct ppc_debug_info dbginfo; - - dbginfo.version = 1; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS; - dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS; - dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS; - dbginfo.data_bp_alignment = 4; - dbginfo.sizeof_condition = 4; - dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE | - PPC_DEBUG_FEATURE_INSN_BP_MASK; -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - dbginfo.features |= - PPC_DEBUG_FEATURE_DATA_BP_RANGE | - PPC_DEBUG_FEATURE_DATA_BP_MASK; -#endif -#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ - dbginfo.num_instruction_bps = 0; - if (ppc_breakpoint_available()) - dbginfo.num_data_bps = 1; - else - dbginfo.num_data_bps = 0; - dbginfo.num_condition_regs = 0; -#ifdef CONFIG_PPC64 - dbginfo.data_bp_alignment = 8; -#else - dbginfo.data_bp_alignment = 4; -#endif - dbginfo.sizeof_condition = 0; -#ifdef CONFIG_HAVE_HW_BREAKPOINT - dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; - if (dawr_enabled()) - dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; -#else - dbginfo.features = 0; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - - if (copy_to_user(datavp, &dbginfo, - sizeof(struct ppc_debug_info))) - return -EFAULT; - return 0; - } - - case PPC_PTRACE_SETHWDEBUG: { - struct ppc_hw_breakpoint bp_info; - - if (copy_from_user(&bp_info, datavp, - sizeof(struct ppc_hw_breakpoint))) - return -EFAULT; - return ppc_set_hwdebug(child, &bp_info); - } - - case PPC_PTRACE_DELHWDEBUG: { - ret = ppc_del_hwdebug(child, data); - break; - } - - case PTRACE_GET_DEBUGREG: { -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - unsigned long dabr_fake; -#endif - ret = -EINVAL; - /* We only support one DABR and no IABRS at the moment */ - if (addr > 0) - break; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - ret = put_user(child->thread.debug.dac1, datalp); -#else - dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | - (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); - ret = put_user(dabr_fake, datalp); -#endif - break; - } - - case PTRACE_SET_DEBUGREG: - ret = ptrace_set_debugreg(child, addr, data); - break; - -#ifdef CONFIG_PPC64 - case PTRACE_GETREGS64: -#endif - case PTRACE_GETREGS: /* Get all pt_regs from the child. */ - return copy_regset_to_user(child, &user_ppc_native_view, - REGSET_GPR, - 0, sizeof(struct user_pt_regs), - datavp); - -#ifdef CONFIG_PPC64 - case PTRACE_SETREGS64: -#endif - case PTRACE_SETREGS: /* Set all gp regs in the child. */ - return copy_regset_from_user(child, &user_ppc_native_view, - REGSET_GPR, - 0, sizeof(struct user_pt_regs), - datavp); - - case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */ - return copy_regset_to_user(child, &user_ppc_native_view, - REGSET_FPR, - 0, sizeof(elf_fpregset_t), - datavp); - - case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */ - return copy_regset_from_user(child, &user_ppc_native_view, - REGSET_FPR, - 0, sizeof(elf_fpregset_t), - datavp); - -#ifdef CONFIG_ALTIVEC - case PTRACE_GETVRREGS: - return copy_regset_to_user(child, &user_ppc_native_view, - REGSET_VMX, - 0, (33 * sizeof(vector128) + - sizeof(u32)), - datavp); - - case PTRACE_SETVRREGS: - return copy_regset_from_user(child, &user_ppc_native_view, - REGSET_VMX, - 0, (33 * sizeof(vector128) + - sizeof(u32)), - datavp); -#endif -#ifdef CONFIG_VSX - case PTRACE_GETVSRREGS: - return copy_regset_to_user(child, &user_ppc_native_view, - REGSET_VSX, - 0, 32 * sizeof(double), - datavp); - - case PTRACE_SETVSRREGS: - return copy_regset_from_user(child, &user_ppc_native_view, - REGSET_VSX, - 0, 32 * sizeof(double), - datavp); -#endif -#ifdef CONFIG_SPE - case PTRACE_GETEVRREGS: - /* Get the child spe register state. */ - return copy_regset_to_user(child, &user_ppc_native_view, - REGSET_SPE, 0, 35 * sizeof(u32), - datavp); - - case PTRACE_SETEVRREGS: - /* Set the child spe register state. */ - return copy_regset_from_user(child, &user_ppc_native_view, - REGSET_SPE, 0, 35 * sizeof(u32), - datavp); -#endif - - default: - ret = ptrace_request(child, request, addr, data); - break; - } - return ret; -} - -#ifdef CONFIG_SECCOMP -static int do_seccomp(struct pt_regs *regs) -{ - if (!test_thread_flag(TIF_SECCOMP)) - return 0; - - /* - * The ABI we present to seccomp tracers is that r3 contains - * the syscall return value and orig_gpr3 contains the first - * syscall parameter. This is different to the ptrace ABI where - * both r3 and orig_gpr3 contain the first syscall parameter. - */ - regs->gpr[3] = -ENOSYS; - - /* - * We use the __ version here because we have already checked - * TIF_SECCOMP. If this fails, there is nothing left to do, we - * have already loaded -ENOSYS into r3, or seccomp has put - * something else in r3 (via SECCOMP_RET_ERRNO/TRACE). - */ - if (__secure_computing(NULL)) - return -1; - - /* - * The syscall was allowed by seccomp, restore the register - * state to what audit expects. - * Note that we use orig_gpr3, which means a seccomp tracer can - * modify the first syscall parameter (in orig_gpr3) and also - * allow the syscall to proceed. - */ - regs->gpr[3] = regs->orig_gpr3; - - return 0; -} -#else -static inline int do_seccomp(struct pt_regs *regs) { return 0; } -#endif /* CONFIG_SECCOMP */ - -/** - * do_syscall_trace_enter() - Do syscall tracing on kernel entry. - * @regs: the pt_regs of the task to trace (current) - * - * Performs various types of tracing on syscall entry. This includes seccomp, - * ptrace, syscall tracepoints and audit. - * - * The pt_regs are potentially visible to userspace via ptrace, so their - * contents is ABI. - * - * One or more of the tracers may modify the contents of pt_regs, in particular - * to modify arguments or even the syscall number itself. - * - * It's also possible that a tracer can choose to reject the system call. In - * that case this function will return an illegal syscall number, and will put - * an appropriate return value in regs->r3. - * - * Return: the (possibly changed) syscall number. - */ -long do_syscall_trace_enter(struct pt_regs *regs) -{ - u32 flags; - - user_exit(); - - flags = READ_ONCE(current_thread_info()->flags) & - (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); - - if (flags) { - int rc = tracehook_report_syscall_entry(regs); - - if (unlikely(flags & _TIF_SYSCALL_EMU)) { - /* - * A nonzero return code from - * tracehook_report_syscall_entry() tells us to prevent - * the syscall execution, but we are not going to - * execute it anyway. - * - * Returning -1 will skip the syscall execution. We want - * to avoid clobbering any registers, so we don't goto - * the skip label below. - */ - return -1; - } - - if (rc) { - /* - * The tracer decided to abort the syscall. Note that - * the tracer may also just change regs->gpr[0] to an - * invalid syscall number, that is handled below on the - * exit path. - */ - goto skip; - } - } - - /* Run seccomp after ptrace; allow it to set gpr[3]. */ - if (do_seccomp(regs)) - return -1; - - /* Avoid trace and audit when syscall is invalid. */ - if (regs->gpr[0] >= NR_syscalls) - goto skip; - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gpr[0]); - -#ifdef CONFIG_PPC64 - if (!is_32bit_task()) - audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4], - regs->gpr[5], regs->gpr[6]); - else -#endif - audit_syscall_entry(regs->gpr[0], - regs->gpr[3] & 0xffffffff, - regs->gpr[4] & 0xffffffff, - regs->gpr[5] & 0xffffffff, - regs->gpr[6] & 0xffffffff); - - /* Return the possibly modified but valid syscall number */ - return regs->gpr[0]; - -skip: - /* - * If we are aborting explicitly, or if the syscall number is - * now invalid, set the return value to -ENOSYS. - */ - regs->gpr[3] = -ENOSYS; - return -1; -} - -void do_syscall_trace_leave(struct pt_regs *regs) -{ - int step; - - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->result); - - step = test_thread_flag(TIF_SINGLESTEP); - if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); - - user_enter(); -} - -void __init pt_regs_check(void); - -/* - * Dummy function, its purpose is to break the build if struct pt_regs and - * struct user_pt_regs don't match. - */ -void __init pt_regs_check(void) -{ - BUILD_BUG_ON(offsetof(struct pt_regs, gpr) != - offsetof(struct user_pt_regs, gpr)); - BUILD_BUG_ON(offsetof(struct pt_regs, nip) != - offsetof(struct user_pt_regs, nip)); - BUILD_BUG_ON(offsetof(struct pt_regs, msr) != - offsetof(struct user_pt_regs, msr)); - BUILD_BUG_ON(offsetof(struct pt_regs, msr) != - offsetof(struct user_pt_regs, msr)); - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct user_pt_regs, orig_gpr3)); - BUILD_BUG_ON(offsetof(struct pt_regs, ctr) != - offsetof(struct user_pt_regs, ctr)); - BUILD_BUG_ON(offsetof(struct pt_regs, link) != - offsetof(struct user_pt_regs, link)); - BUILD_BUG_ON(offsetof(struct pt_regs, xer) != - offsetof(struct user_pt_regs, xer)); - BUILD_BUG_ON(offsetof(struct pt_regs, ccr) != - offsetof(struct user_pt_regs, ccr)); -#ifdef __powerpc64__ - BUILD_BUG_ON(offsetof(struct pt_regs, softe) != - offsetof(struct user_pt_regs, softe)); -#else - BUILD_BUG_ON(offsetof(struct pt_regs, mq) != - offsetof(struct user_pt_regs, mq)); -#endif - BUILD_BUG_ON(offsetof(struct pt_regs, trap) != - offsetof(struct user_pt_regs, trap)); - BUILD_BUG_ON(offsetof(struct pt_regs, dar) != - offsetof(struct user_pt_regs, dar)); - BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) != - offsetof(struct user_pt_regs, dsisr)); - BUILD_BUG_ON(offsetof(struct pt_regs, result) != - offsetof(struct user_pt_regs, result)); - - BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs)); - - // Now check that the pt_regs offsets match the uapi #defines - #define CHECK_REG(_pt, _reg) \ - BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \ - sizeof(unsigned long))); - - CHECK_REG(PT_R0, gpr[0]); - CHECK_REG(PT_R1, gpr[1]); - CHECK_REG(PT_R2, gpr[2]); - CHECK_REG(PT_R3, gpr[3]); - CHECK_REG(PT_R4, gpr[4]); - CHECK_REG(PT_R5, gpr[5]); - CHECK_REG(PT_R6, gpr[6]); - CHECK_REG(PT_R7, gpr[7]); - CHECK_REG(PT_R8, gpr[8]); - CHECK_REG(PT_R9, gpr[9]); - CHECK_REG(PT_R10, gpr[10]); - CHECK_REG(PT_R11, gpr[11]); - CHECK_REG(PT_R12, gpr[12]); - CHECK_REG(PT_R13, gpr[13]); - CHECK_REG(PT_R14, gpr[14]); - CHECK_REG(PT_R15, gpr[15]); - CHECK_REG(PT_R16, gpr[16]); - CHECK_REG(PT_R17, gpr[17]); - CHECK_REG(PT_R18, gpr[18]); - CHECK_REG(PT_R19, gpr[19]); - CHECK_REG(PT_R20, gpr[20]); - CHECK_REG(PT_R21, gpr[21]); - CHECK_REG(PT_R22, gpr[22]); - CHECK_REG(PT_R23, gpr[23]); - CHECK_REG(PT_R24, gpr[24]); - CHECK_REG(PT_R25, gpr[25]); - CHECK_REG(PT_R26, gpr[26]); - CHECK_REG(PT_R27, gpr[27]); - CHECK_REG(PT_R28, gpr[28]); - CHECK_REG(PT_R29, gpr[29]); - CHECK_REG(PT_R30, gpr[30]); - CHECK_REG(PT_R31, gpr[31]); - CHECK_REG(PT_NIP, nip); - CHECK_REG(PT_MSR, msr); - CHECK_REG(PT_ORIG_R3, orig_gpr3); - CHECK_REG(PT_CTR, ctr); - CHECK_REG(PT_LNK, link); - CHECK_REG(PT_XER, xer); - CHECK_REG(PT_CCR, ccr); -#ifdef CONFIG_PPC64 - CHECK_REG(PT_SOFTE, softe); -#else - CHECK_REG(PT_MQ, mq); -#endif - CHECK_REG(PT_TRAP, trap); - CHECK_REG(PT_DAR, dar); - CHECK_REG(PT_DSISR, dsisr); - CHECK_REG(PT_RESULT, result); - #undef CHECK_REG - - BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long)); - - /* - * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the - * real registers. - */ - BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); -} diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile new file mode 100644 index 00000000000000..e9d97c2d063e5f --- /dev/null +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the linux kernel. +# + +CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' + +obj-y += ptrace.o ptrace-view.o +obj-$(CONFIG_PPC64) += ptrace32.o +obj-$(CONFIG_VSX) += ptrace-vsx.o +ifneq ($(CONFIG_VSX),y) +obj-y += ptrace-novsx.o +endif +obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o +obj-$(CONFIG_SPE) += ptrace-spe.o +obj-$(CONFIG_PPC_TRANSACTIONAL_MEM) += ptrace-tm.o +obj-$(CONFIG_PPC_ADV_DEBUG_REGS) += ptrace-adv.o +ifneq ($(CONFIG_PPC_ADV_DEBUG_REGS),y) +obj-y += ptrace-noadv.o +endif diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c new file mode 100644 index 00000000000000..3990c01ef8cfa9 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c @@ -0,0 +1,492 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include "ptrace-decl.h" + +void user_enable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { + task->thread.debug.dbcr0 &= ~DBCR0_BT; + task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; + regs->msr |= MSR_DE; + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_enable_block_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { + task->thread.debug.dbcr0 &= ~DBCR0_IC; + task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT; + regs->msr |= MSR_DE; + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_disable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { + /* + * The logic to disable single stepping should be as + * simple as turning off the Instruction Complete flag. + * And, after doing so, if all debug flags are off, turn + * off DBCR0(IDM) and MSR(DE) .... Torez + */ + task->thread.debug.dbcr0 &= ~(DBCR0_IC | DBCR0_BT); + /* + * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. + */ + if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, + task->thread.debug.dbcr1)) { + /* + * All debug events were off..... + */ + task->thread.debug.dbcr0 &= ~DBCR0_IDM; + regs->msr &= ~MSR_DE; + } + } + clear_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void ppc_gethwdinfo(struct ppc_debug_info *dbginfo) +{ + dbginfo->version = 1; + dbginfo->num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS; + dbginfo->num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS; + dbginfo->num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS; + dbginfo->data_bp_alignment = 4; + dbginfo->sizeof_condition = 4; + dbginfo->features = PPC_DEBUG_FEATURE_INSN_BP_RANGE | + PPC_DEBUG_FEATURE_INSN_BP_MASK; + if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_DAC_RANGE)) + dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_RANGE | + PPC_DEBUG_FEATURE_DATA_BP_MASK; +} + +int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, + unsigned long __user *datalp) +{ + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + return -EINVAL; + return put_user(child->thread.debug.dac1, datalp); +} + +int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int ret; + struct thread_struct *thread = &task->thread; + struct perf_event *bp; + struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). + * For embedded processors we support one DAC and no IAC's at the + * moment. + */ + if (addr > 0) + return -EINVAL; + + /* The bottom 3 bits in dabr are flags */ + if ((data & ~0x7UL) >= TASK_SIZE) + return -EIO; + + /* As described above, it was assumed 3 bits were passed with the data + * address, but we will assume only the mode bits will be passed + * as to not cause alignment restrictions for DAC-based processors. + */ + + /* DAC's hold the whole address without any mode flags */ + task->thread.debug.dac1 = data & ~0x3UL; + + if (task->thread.debug.dac1 == 0) { + dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); + if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, + task->thread.debug.dbcr1)) { + task->thread.regs->msr &= ~MSR_DE; + task->thread.debug.dbcr0 &= ~DBCR0_IDM; + } + return 0; + } + + /* Read or Write bits must be set */ + + if (!(data & 0x3UL)) + return -EINVAL; + + /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */ + task->thread.debug.dbcr0 |= DBCR0_IDM; + + /* Check for write and read flags and set DBCR0 accordingly */ + dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); + if (data & 0x1UL) + dbcr_dac(task) |= DBCR_DAC1R; + if (data & 0x2UL) + dbcr_dac(task) |= DBCR_DAC1W; + task->thread.regs->msr |= MSR_DE; + return 0; +} + +static long set_instruction_bp(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int slot; + int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0); + int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0); + int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0); + int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0); + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + slot2_in_use = 1; + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + slot4_in_use = 1; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { + /* Make sure range is valid. */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + + /* We need a pair of IAC regsisters */ + if (!slot1_in_use && !slot2_in_use) { + slot = 1; + child->thread.debug.iac1 = bp_info->addr; + child->thread.debug.iac2 = bp_info->addr2; + child->thread.debug.dbcr0 |= DBCR0_IAC1; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC12X; + else + dbcr_iac_range(child) |= DBCR_IAC12I; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if ((!slot3_in_use) && (!slot4_in_use)) { + slot = 3; + child->thread.debug.iac3 = bp_info->addr; + child->thread.debug.iac4 = bp_info->addr2; + child->thread.debug.dbcr0 |= DBCR0_IAC3; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC34X; + else + dbcr_iac_range(child) |= DBCR_IAC34I; +#endif + } else { + return -ENOSPC; + } + } else { + /* We only need one. If possible leave a pair free in + * case a range is needed later + */ + if (!slot1_in_use) { + /* + * Don't use iac1 if iac1-iac2 are free and either + * iac3 or iac4 (but not both) are free + */ + if (slot2_in_use || slot3_in_use == slot4_in_use) { + slot = 1; + child->thread.debug.iac1 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC1; + goto out; + } + } + if (!slot2_in_use) { + slot = 2; + child->thread.debug.iac2 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC2; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if (!slot3_in_use) { + slot = 3; + child->thread.debug.iac3 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC3; + } else if (!slot4_in_use) { + slot = 4; + child->thread.debug.iac4 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC4; +#endif + } else { + return -ENOSPC; + } + } +out: + child->thread.debug.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot; +} + +static int del_instruction_bp(struct task_struct *child, int slot) +{ + switch (slot) { + case 1: + if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) { + /* address range - clear slots 1 & 2 */ + child->thread.debug.iac2 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC12MODE; + } + child->thread.debug.iac1 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC1; + break; + case 2: + if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + /* used in a range */ + return -EINVAL; + child->thread.debug.iac2 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC2; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case 3: + if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) { + /* address range - clear slots 3 & 4 */ + child->thread.debug.iac4 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC34MODE; + } + child->thread.debug.iac3 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC3; + break; + case 4: + if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + /* Used in a range */ + return -EINVAL; + child->thread.debug.iac4 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC4; + break; +#endif + default: + return -EINVAL; + } + return 0; +} + +static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) +{ + int byte_enable = + (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT) + & 0xf; + int condition_mode = + bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE; + int slot; + + if (byte_enable && condition_mode == 0) + return -EINVAL; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) { + slot = 1; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC1R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC1W; + child->thread.debug.dac1 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.debug.dvc1 = + (unsigned long)bp_info->condition_value; + child->thread.debug.dbcr2 |= + ((byte_enable << DBCR2_DVC1BE_SHIFT) | + (condition_mode << DBCR2_DVC1M_SHIFT)); + } +#endif +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { + /* Both dac1 and dac2 are part of a range */ + return -ENOSPC; +#endif + } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) { + slot = 2; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC2R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC2W; + child->thread.debug.dac2 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.debug.dvc2 = + (unsigned long)bp_info->condition_value; + child->thread.debug.dbcr2 |= + ((byte_enable << DBCR2_DVC2BE_SHIFT) | + (condition_mode << DBCR2_DVC2M_SHIFT)); + } +#endif + } else { + return -ENOSPC; + } + child->thread.debug.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot + 4; +} + +static int del_dac(struct task_struct *child, int slot) +{ + if (slot == 1) { + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) + return -ENOENT; + + child->thread.debug.dac1 = 0; + dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { + child->thread.debug.dac2 = 0; + child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE; + } + child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.debug.dvc1 = 0; +#endif + } else if (slot == 2) { + if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) + return -ENOENT; + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) + /* Part of a range */ + return -EINVAL; + child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.debug.dvc2 = 0; +#endif + child->thread.debug.dac2 = 0; + dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); + } else { + return -EINVAL; + } + + return 0; +} + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE +static int set_dac_range(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK; + + /* We don't allow range watchpoints to be used with DVC */ + if (bp_info->condition_mode) + return -EINVAL; + + /* + * Best effort to verify the address range. The user/supervisor bits + * prevent trapping in kernel space, but let's fail on an obvious bad + * range. The simple test on the mask is not fool-proof, and any + * exclusive range will spill over into kernel space. + */ + if (bp_info->addr >= TASK_SIZE) + return -EIO; + if (mode == PPC_BREAKPOINT_MODE_MASK) { + /* + * dac2 is a bitmask. Don't allow a mask that makes a + * kernel space address from a valid dac1 value + */ + if (~((unsigned long)bp_info->addr2) >= TASK_SIZE) + return -EIO; + } else { + /* + * For range breakpoints, addr2 must also be a valid address + */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + } + + if (child->thread.debug.dbcr0 & + (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) + return -ENOSPC; + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); + child->thread.debug.dac1 = bp_info->addr; + child->thread.debug.dac2 = bp_info->addr2; + if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) + child->thread.debug.dbcr2 |= DBCR2_DAC12M; + else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + child->thread.debug.dbcr2 |= DBCR2_DAC12MX; + else /* PPC_BREAKPOINT_MODE_MASK */ + child->thread.debug.dbcr2 |= DBCR2_DAC12MM; + child->thread.regs->msr |= MSR_DE; + + return 5; +} +#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */ + +long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) +{ + if (bp_info->version != 1) + return -ENOTSUPP; + /* + * Check for invalid flags and combinations + */ + if (bp_info->trigger_type == 0 || + (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE | + PPC_BREAKPOINT_TRIGGER_RW)) || + (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) || + (bp_info->condition_mode & + ~(PPC_BREAKPOINT_CONDITION_MODE | + PPC_BREAKPOINT_CONDITION_BE_ALL))) + return -EINVAL; +#if CONFIG_PPC_ADV_DEBUG_DVCS == 0 + if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; +#endif + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) { + if (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE || + bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; + return set_instruction_bp(child, bp_info); + } + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + return set_dac(child, bp_info); + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + return set_dac_range(child, bp_info); +#else + return -EINVAL; +#endif +} + +long ppc_del_hwdebug(struct task_struct *child, long data) +{ + int rc; + + if (data <= 4) + rc = del_instruction_bp(child, (int)data); + else + rc = del_dac(child, (int)data - 4); + + if (!rc) { + if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, + child->thread.debug.dbcr1)) { + child->thread.debug.dbcr0 &= ~DBCR0_IDM; + child->thread.regs->msr &= ~MSR_DE; + } + } + return rc; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace-altivec.c b/arch/powerpc/kernel/ptrace/ptrace-altivec.c new file mode 100644 index 00000000000000..dd8b75dfbd0687 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-altivec.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include + +#include "ptrace-decl.h" + +/* + * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. + * The transfer totals 34 quadword. Quadwords 0-31 contain the + * corresponding vector registers. Quadword 32 contains the vscr as the + * last word (offset 12) within that quadword. Quadword 33 contains the + * vrsave as the first word (offset 0) within the quadword. + * + * This definition of the VMX state is compatible with the current PPC32 + * ptrace interface. This allows signal handling and ptrace to use the + * same structures. This also simplifies the implementation of a bi-arch + * (combined (32- and 64-bit) gdb. + */ + +int vr_active(struct task_struct *target, const struct user_regset *regset) +{ + flush_altivec_to_thread(target); + return target->thread.used_vr ? regset->n : 0; +} + +/* + * Regardless of transactions, 'vr_state' holds the current running + * value of all the VMX registers and 'ckvr_state' holds the last + * checkpointed value of all the VMX registers for the current + * transaction to fall back on in case it aborts. + * + * Userspace interface buffer layout: + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + * }; + */ +int vr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + flush_altivec_to_thread(target); + + BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != + offsetof(struct thread_vr_state, vr[32])); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.vr_state, 0, + 33 * sizeof(vector128)); + if (!ret) { + /* + * Copy out only the low-order word of vrsave. + */ + int start, end; + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + + vrsave.word = target->thread.vrsave; + + start = 33 * sizeof(vector128); + end = start + sizeof(vrsave); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, + start, end); + } + + return ret; +} + +/* + * Regardless of transactions, 'vr_state' holds the current running + * value of all the VMX registers and 'ckvr_state' holds the last + * checkpointed value of all the VMX registers for the current + * transaction to fall back on in case it aborts. + * + * Userspace interface buffer layout: + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + * }; + */ +int vr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + flush_altivec_to_thread(target); + + BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != + offsetof(struct thread_vr_state, vr[32])); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.vr_state, 0, + 33 * sizeof(vector128)); + if (!ret && count > 0) { + /* + * We use only the first word of vrsave. + */ + int start, end; + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + + vrsave.word = target->thread.vrsave; + + start = 33 * sizeof(vector128); + end = start + sizeof(vrsave); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, + start, end); + if (!ret) + target->thread.vrsave = vrsave.word; + } + + return ret; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h new file mode 100644 index 00000000000000..3c8a8199929252 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* + * Set of msr bits that gdb can change on behalf of a process. + */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +#define MSR_DEBUGCHANGE 0 +#else +#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) +#endif + +/* + * Max register writeable via put_reg + */ +#ifdef CONFIG_PPC32 +#define PT_MAX_PUT_REG PT_MQ +#else +#define PT_MAX_PUT_REG PT_CCR +#endif + +#define TVSO(f) (offsetof(struct thread_vr_state, f)) +#define TFSO(f) (offsetof(struct thread_fp_state, f)) +#define TSO(f) (offsetof(struct thread_struct, f)) + +/* + * These are our native regset flavors. + */ +enum powerpc_regset { + REGSET_GPR, + REGSET_FPR, +#ifdef CONFIG_ALTIVEC + REGSET_VMX, +#endif +#ifdef CONFIG_VSX + REGSET_VSX, +#endif +#ifdef CONFIG_SPE + REGSET_SPE, +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + REGSET_TM_CGPR, /* TM checkpointed GPR registers */ + REGSET_TM_CFPR, /* TM checkpointed FPR registers */ + REGSET_TM_CVMX, /* TM checkpointed VMX registers */ + REGSET_TM_CVSX, /* TM checkpointed VSX registers */ + REGSET_TM_SPR, /* TM specific SPR registers */ + REGSET_TM_CTAR, /* TM checkpointed TAR register */ + REGSET_TM_CPPR, /* TM checkpointed PPR register */ + REGSET_TM_CDSCR, /* TM checkpointed DSCR register */ +#endif +#ifdef CONFIG_PPC64 + REGSET_PPR, /* PPR register */ + REGSET_DSCR, /* DSCR register */ +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + REGSET_TAR, /* TAR register */ + REGSET_EBB, /* EBB registers */ + REGSET_PMR, /* Performance Monitor Registers */ +#endif +#ifdef CONFIG_PPC_MEM_KEYS + REGSET_PKEY, /* AMR register */ +#endif +}; + +/* ptrace-(no)vsx */ + +int fpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int fpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +/* ptrace-vsx */ + +int vsr_active(struct task_struct *target, const struct user_regset *regset); +int vsr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int vsr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +/* ptrace-altivec */ + +int vr_active(struct task_struct *target, const struct user_regset *regset); +int vr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int vr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +/* ptrace-spe */ + +int evr_active(struct task_struct *target, const struct user_regset *regset); +int evr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int evr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +/* ptrace */ + +int gpr32_get_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf, + unsigned long *regs); +int gpr32_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + unsigned long *regs); + +/* ptrace-tm */ + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +void flush_tmregs_to_thread(struct task_struct *tsk); +#else +static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } +#endif + +int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset); +int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset); +int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset); +int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset); +int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_spr_active(struct task_struct *target, const struct user_regset *regset); +int tm_spr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_spr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_tar_active(struct task_struct *target, const struct user_regset *regset); +int tm_tar_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_tar_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_ppr_active(struct task_struct *target, const struct user_regset *regset); +int tm_ppr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_ppr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_dscr_active(struct task_struct *target, const struct user_regset *regset); +int tm_dscr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_dscr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +/* ptrace-view */ + +extern const struct user_regset_view user_ppc_native_view; + +/* ptrace-(no)adv */ +void ppc_gethwdinfo(struct ppc_debug_info *dbginfo); +int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, + unsigned long __user *datalp); +int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data); +long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info); +long ppc_del_hwdebug(struct task_struct *child, long data); diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c new file mode 100644 index 00000000000000..f87e7c5c3bf3af --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include + +#include "ptrace-decl.h" + +void user_enable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { + regs->msr &= ~MSR_BE; + regs->msr |= MSR_SE; + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_enable_block_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { + regs->msr &= ~MSR_SE; + regs->msr |= MSR_BE; + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_disable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) + regs->msr &= ~(MSR_SE | MSR_BE); + + clear_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void ppc_gethwdinfo(struct ppc_debug_info *dbginfo) +{ + dbginfo->version = 1; + dbginfo->num_instruction_bps = 0; + if (ppc_breakpoint_available()) + dbginfo->num_data_bps = 1; + else + dbginfo->num_data_bps = 0; + dbginfo->num_condition_regs = 0; + dbginfo->data_bp_alignment = sizeof(long); + dbginfo->sizeof_condition = 0; + if (IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT)) { + dbginfo->features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; + if (dawr_enabled()) + dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; + } else { + dbginfo->features = 0; + } +} + +int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, + unsigned long __user *datalp) +{ + unsigned long dabr_fake; + + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + return -EINVAL; + dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + return put_user(dabr_fake, datalp); +} + +int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int ret; + struct thread_struct *thread = &task->thread; + struct perf_event *bp; + struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + bool set_bp = true; + struct arch_hw_breakpoint hw_brk; + + /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). + * For embedded processors we support one DAC and no IAC's at the + * moment. + */ + if (addr > 0) + return -EINVAL; + + /* The bottom 3 bits in dabr are flags */ + if ((data & ~0x7UL) >= TASK_SIZE) + return -EIO; + + /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. + * It was assumed, on previous implementations, that 3 bits were + * passed together with the data address, fitting the design of the + * DABR register, as follows: + * + * bit 0: Read flag + * bit 1: Write flag + * bit 2: Breakpoint translation + * + * Thus, we use them here as so. + */ + + /* Ensure breakpoint translation bit is set */ + if (data && !(data & HW_BRK_TYPE_TRANSLATE)) + return -EIO; + hw_brk.address = data & (~HW_BRK_TYPE_DABR); + hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; + hw_brk.len = DABR_MAX_LEN; + hw_brk.hw_len = DABR_MAX_LEN; + set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); +#ifdef CONFIG_HAVE_HW_BREAKPOINT + bp = thread->ptrace_bps[0]; + if (!set_bp) { + if (bp) { + unregister_hw_breakpoint(bp); + thread->ptrace_bps[0] = NULL; + } + return 0; + } + if (bp) { + attr = bp->attr; + attr.bp_addr = hw_brk.address; + attr.bp_len = DABR_MAX_LEN; + arch_bp_generic_fields(hw_brk.type, &attr.bp_type); + + /* Enable breakpoint */ + attr.disabled = false; + + ret = modify_user_hw_breakpoint(bp, &attr); + if (ret) + return ret; + + thread->ptrace_bps[0] = bp; + thread->hw_brk = hw_brk; + return 0; + } + + /* Create a new breakpoint request if one doesn't exist already */ + hw_breakpoint_init(&attr); + attr.bp_addr = hw_brk.address; + attr.bp_len = DABR_MAX_LEN; + arch_bp_generic_fields(hw_brk.type, + &attr.bp_type); + + thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, + ptrace_triggered, NULL, task); + if (IS_ERR(bp)) { + thread->ptrace_bps[0] = NULL; + return PTR_ERR(bp); + } + +#else /* !CONFIG_HAVE_HW_BREAKPOINT */ + if (set_bp && (!ppc_breakpoint_available())) + return -ENODEV; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + task->thread.hw_brk = hw_brk; + return 0; +} + +long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int len = 0; + struct thread_struct *thread = &child->thread; + struct perf_event *bp; + struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + struct arch_hw_breakpoint brk; + + if (bp_info->version != 1) + return -ENOTSUPP; + /* + * We only support one data breakpoint + */ + if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 || + (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 || + bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; + + if ((unsigned long)bp_info->addr >= TASK_SIZE) + return -EIO; + + brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN; + brk.type = HW_BRK_TYPE_TRANSLATE; + brk.len = DABR_MAX_LEN; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + brk.type |= HW_BRK_TYPE_READ; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + brk.type |= HW_BRK_TYPE_WRITE; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) + len = bp_info->addr2 - bp_info->addr; + else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + len = 1; + else + return -EINVAL; + bp = thread->ptrace_bps[0]; + if (bp) + return -ENOSPC; + + /* Create a new breakpoint request if one doesn't exist already */ + hw_breakpoint_init(&attr); + attr.bp_addr = (unsigned long)bp_info->addr; + attr.bp_len = len; + arch_bp_generic_fields(brk.type, &attr.bp_type); + + bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child); + thread->ptrace_bps[0] = bp; + if (IS_ERR(bp)) { + thread->ptrace_bps[0] = NULL; + return PTR_ERR(bp); + } + + return 1; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) + return -EINVAL; + + if (child->thread.hw_brk.address) + return -ENOSPC; + + if (!ppc_breakpoint_available()) + return -ENODEV; + + child->thread.hw_brk = brk; + + return 1; +} + +long ppc_del_hwdebug(struct task_struct *child, long data) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int ret = 0; + struct thread_struct *thread = &child->thread; + struct perf_event *bp; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + if (data != 1) + return -EINVAL; + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + bp = thread->ptrace_bps[0]; + if (bp) { + unregister_hw_breakpoint(bp); + thread->ptrace_bps[0] = NULL; + } else { + ret = -ENOENT; + } + return ret; +#else /* CONFIG_HAVE_HW_BREAKPOINT */ + if (child->thread.hw_brk.address == 0) + return -ENOENT; + + child->thread.hw_brk.address = 0; + child->thread.hw_brk.type = 0; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + return 0; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c new file mode 100644 index 00000000000000..b2dc4e92d11ab1 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include + +#include "ptrace-decl.h" + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + */ +int fpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != + offsetof(struct thread_fp_state, fpr[32])); + + flush_fp_to_thread(target); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fp_state, 0, -1); +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + * + */ +int fpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != + offsetof(struct thread_fp_state, fpr[32])); + + flush_fp_to_thread(target); + + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fp_state, 0, -1); +} diff --git a/arch/powerpc/kernel/ptrace/ptrace-spe.c b/arch/powerpc/kernel/ptrace/ptrace-spe.c new file mode 100644 index 00000000000000..68b86b4a4be46b --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-spe.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include + +#include "ptrace-decl.h" + +/* + * For get_evrregs/set_evrregs functions 'data' has the following layout: + * + * struct { + * u32 evr[32]; + * u64 acc; + * u32 spefscr; + * } + */ + +int evr_active(struct task_struct *target, const struct user_regset *regset) +{ + flush_spe_to_thread(target); + return target->thread.used_spe ? regset->n : 0; +} + +int evr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + flush_spe_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.evr, + 0, sizeof(target->thread.evr)); + + BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != + offsetof(struct thread_struct, spefscr)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.acc, + sizeof(target->thread.evr), -1); + + return ret; +} + +int evr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + flush_spe_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.evr, + 0, sizeof(target->thread.evr)); + + BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != + offsetof(struct thread_struct, spefscr)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.acc, + sizeof(target->thread.evr), -1); + + return ret; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c new file mode 100644 index 00000000000000..d75aff31f6379f --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -0,0 +1,851 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include +#include +#include + +#include "ptrace-decl.h" + +void flush_tmregs_to_thread(struct task_struct *tsk) +{ + /* + * If task is not current, it will have been flushed already to + * it's thread_struct during __switch_to(). + * + * A reclaim flushes ALL the state or if not in TM save TM SPRs + * in the appropriate thread structures from live. + */ + + if (!cpu_has_feature(CPU_FTR_TM) || tsk != current) + return; + + if (MSR_TM_SUSPENDED(mfmsr())) { + tm_reclaim_current(TM_CAUSE_SIGNAL); + } else { + tm_enable(); + tm_save_sprs(&tsk->thread); + } +} + +static unsigned long get_user_ckpt_msr(struct task_struct *task) +{ + return task->thread.ckpt_regs.msr | task->thread.fpexc_mode; +} + +static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr) +{ + task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE; + task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE; + return 0; +} + +static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap) +{ + task->thread.ckpt_regs.trap = trap & 0xfff0; + return 0; +} + +/** + * tm_cgpr_active - get active number of registers in CGPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed GPR category. + */ +int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cgpr_get - get CGPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets transaction checkpointed GPR registers. + * + * When the transaction is active, 'ckpt_regs' holds all the checkpointed + * GPR register values for the current transaction to fall back on if it + * aborts in between. This function gets those checkpointed GPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * struct pt_regs ckpt_regs; + * }; + */ +int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs, + 0, offsetof(struct pt_regs, msr)); + if (!ret) { + unsigned long msr = get_user_ckpt_msr(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, + offsetof(struct pt_regs, msr), + offsetof(struct pt_regs, msr) + + sizeof(msr)); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs.orig_gpr3, + offsetof(struct pt_regs, orig_gpr3), + sizeof(struct user_pt_regs)); + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(struct user_pt_regs), -1); + + return ret; +} + +/* + * tm_cgpr_set - set the CGPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed GPR registers. + * + * When the transaction is active, 'ckpt_regs' holds the checkpointed + * GPR register values for the current transaction to fall back on if it + * aborts in between. This function sets those checkpointed GPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * struct pt_regs ckpt_regs; + * }; + */ +int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long reg; + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs, + 0, PT_MSR * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_MSR * sizeof(reg), + (PT_MSR + 1) * sizeof(reg)); + if (!ret) + ret = set_user_ckpt_msr(target, reg); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs.orig_gpr3, + PT_ORIG_R3 * sizeof(reg), + (PT_MAX_PUT_REG + 1) * sizeof(reg)); + + if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_MAX_PUT_REG + 1) * sizeof(reg), + PT_TRAP * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_TRAP * sizeof(reg), + (PT_TRAP + 1) * sizeof(reg)); + if (!ret) + ret = set_user_ckpt_trap(target, reg); + } + + if (!ret) + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); + + return ret; +} + +/** + * tm_cfpr_active - get active number of registers in CFPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed FPR category. + */ +int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cfpr_get - get CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed FPR registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * values for the current transaction to fall back on if it aborts + * in between. This function gets those checkpointed FPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + *}; + */ +int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + u64 buf[33]; + int i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* copy to local buffer then write that out */ + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); +} + +/** + * tm_cfpr_set - set CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed FPR registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * FPR register values for the current transaction to fall back on + * if it aborts in between. This function sets these checkpointed + * FPR registers. The userspace interface buffer layout is as follows. + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + *}; + */ +int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[33]; + int i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + for (i = 0; i < 32; i++) + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; + + /* copy to local buffer then write that out */ + i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + if (i) + return i; + for (i = 0; i < 32 ; i++) + target->thread.TS_CKFPR(i) = buf[i]; + target->thread.ckfp_state.fpscr = buf[32]; + return 0; +} + +/** + * tm_cvmx_active - get active number of registers in CVMX + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in checkpointed VMX category. + */ +int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cvmx_get - get CMVX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed VMX registers. + * + * When the transaction is active 'ckvr_state' and 'ckvrsave' hold + * the checkpointed values for the current transaction to fall + * back on if it aborts in between. The userspace interface buffer + * layout is as follows. + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + *}; + */ +int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state, + 0, 33 * sizeof(vector128)); + if (!ret) { + /* + * Copy out only the low-order word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.ckvrsave; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + } + + return ret; +} + +/** + * tm_cvmx_set - set CMVX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed VMX registers. + * + * When the transaction is active 'ckvr_state' and 'ckvrsave' hold + * the checkpointed values for the current transaction to fall + * back on if it aborts in between. The userspace interface buffer + * layout is as follows. + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + *}; + */ +int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state, + 0, 33 * sizeof(vector128)); + if (!ret && count > 0) { + /* + * We use only the low-order word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.ckvrsave; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + if (!ret) + target->thread.ckvrsave = vrsave.word; + } + + return ret; +} + +/** + * tm_cvsx_active - get active number of registers in CVSX + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed VSX category. + */ +int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + flush_vsx_to_thread(target); + return target->thread.used_vsr ? regset->n : 0; +} + +/** + * tm_cvsx_get - get CVSX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed VSX registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * values for the current transaction to fall back on if it aborts + * in between. This function gets those checkpointed VSX registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * u64 vsx[32]; + *}; + */ +int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + + return ret; +} + +/** + * tm_cvsx_set - set CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed VSX registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * VSX register values for the current transaction to fall back on + * if it aborts in between. This function sets these checkpointed + * FPR registers. The userspace interface buffer layout is as follows. + * + * struct data { + * u64 vsx[32]; + *}; + */ +int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + if (!ret) + for (i = 0; i < 32 ; i++) + target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + + return ret; +} + +/** + * tm_spr_active - get active number of registers in TM SPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks the active number of available + * regisers in the transactional memory SPR category. + */ +int tm_spr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + return regset->n; +} + +/** + * tm_spr_get - get the TM related SPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets transactional memory related SPR registers. + * The userspace interface buffer layout is as follows. + * + * struct { + * u64 tm_tfhar; + * u64 tm_texasr; + * u64 tm_tfiar; + * }; + */ +int tm_spr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + /* Build tests */ + BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); + BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); + BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + /* Flush the states */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* TFHAR register */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfhar, 0, sizeof(u64)); + + /* TEXASR register */ + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_texasr, sizeof(u64), + 2 * sizeof(u64)); + + /* TFIAR register */ + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfiar, + 2 * sizeof(u64), 3 * sizeof(u64)); + return ret; +} + +/** + * tm_spr_set - set the TM related SPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets transactional memory related SPR registers. + * The userspace interface buffer layout is as follows. + * + * struct { + * u64 tm_tfhar; + * u64 tm_texasr; + * u64 tm_tfiar; + * }; + */ +int tm_spr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + /* Build tests */ + BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); + BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); + BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + /* Flush the states */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* TFHAR register */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfhar, 0, sizeof(u64)); + + /* TEXASR register */ + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_texasr, sizeof(u64), + 2 * sizeof(u64)); + + /* TFIAR register */ + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfiar, + 2 * sizeof(u64), 3 * sizeof(u64)); + return ret; +} + +int tm_tar_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + +int tm_tar_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tar, 0, sizeof(u64)); + return ret; +} + +int tm_tar_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tar, 0, sizeof(u64)); + return ret; +} + +int tm_ppr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + + +int tm_ppr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_ppr, 0, sizeof(u64)); + return ret; +} + +int tm_ppr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_ppr, 0, sizeof(u64)); + return ret; +} + +int tm_dscr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + +int tm_dscr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_dscr, 0, sizeof(u64)); + return ret; +} + +int tm_dscr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_dscr, 0, sizeof(u64)); + return ret; +} + +int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.ckpt_regs.gpr[0]); +} + +int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.ckpt_regs.gpr[0]); +} diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c new file mode 100644 index 00000000000000..15e3b79b639528 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -0,0 +1,904 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include + +#include "ptrace-decl.h" + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define STR(s) #s /* convert to string */ +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define GPR_OFFSET_NAME(num) \ + {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \ + {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + GPR_OFFSET_NAME(31), + REG_OFFSET_NAME(nip), + REG_OFFSET_NAME(msr), + REG_OFFSET_NAME(ctr), + REG_OFFSET_NAME(link), + REG_OFFSET_NAME(xer), + REG_OFFSET_NAME(ccr), +#ifdef CONFIG_PPC64 + REG_OFFSET_NAME(softe), +#else + REG_OFFSET_NAME(mq), +#endif + REG_OFFSET_NAME(trap), + REG_OFFSET_NAME(dar), + REG_OFFSET_NAME(dsisr), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +static unsigned long get_user_msr(struct task_struct *task) +{ + return task->thread.regs->msr | task->thread.fpexc_mode; +} + +static int set_user_msr(struct task_struct *task, unsigned long msr) +{ + task->thread.regs->msr &= ~MSR_DEBUGCHANGE; + task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; + return 0; +} + +#ifdef CONFIG_PPC64 +static int get_user_dscr(struct task_struct *task, unsigned long *data) +{ + *data = task->thread.dscr; + return 0; +} + +static int set_user_dscr(struct task_struct *task, unsigned long dscr) +{ + task->thread.dscr = dscr; + task->thread.dscr_inherit = 1; + return 0; +} +#else +static int get_user_dscr(struct task_struct *task, unsigned long *data) +{ + return -EIO; +} + +static int set_user_dscr(struct task_struct *task, unsigned long dscr) +{ + return -EIO; +} +#endif + +/* + * We prevent mucking around with the reserved area of trap + * which are used internally by the kernel. + */ +static int set_user_trap(struct task_struct *task, unsigned long trap) +{ + task->thread.regs->trap = trap & 0xfff0; + return 0; +} + +/* + * Get contents of register REGNO in task TASK. + */ +int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) +{ + unsigned int regs_max; + + if (task->thread.regs == NULL || !data) + return -EIO; + + if (regno == PT_MSR) { + *data = get_user_msr(task); + return 0; + } + + if (regno == PT_DSCR) + return get_user_dscr(task, data); + + /* + * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is + * no more used as a flag, lets force usr to alway see the softe value as 1 + * which means interrupts are not soft disabled. + */ + if (IS_ENABLED(CONFIG_PPC64) && regno == PT_SOFTE) { + *data = 1; + return 0; + } + + regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long); + if (regno < regs_max) { + regno = array_index_nospec(regno, regs_max); + *data = ((unsigned long *)task->thread.regs)[regno]; + return 0; + } + + return -EIO; +} + +/* + * Write contents of register REGNO in task TASK. + */ +int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) +{ + if (task->thread.regs == NULL) + return -EIO; + + if (regno == PT_MSR) + return set_user_msr(task, data); + if (regno == PT_TRAP) + return set_user_trap(task, data); + if (regno == PT_DSCR) + return set_user_dscr(task, data); + + if (regno <= PT_MAX_PUT_REG) { + regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1); + ((unsigned long *)task->thread.regs)[regno] = data; + return 0; + } + return -EIO; +} + +static int gpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int i, ret; + + if (target->thread.regs == NULL) + return -EIO; + + if (!FULL_REGS(target->thread.regs)) { + /* We have a partial register set. Fill 14-31 with bogus values */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + target->thread.regs, + 0, offsetof(struct pt_regs, msr)); + if (!ret) { + unsigned long msr = get_user_msr(target); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, + offsetof(struct pt_regs, msr), + offsetof(struct pt_regs, msr) + + sizeof(msr)); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->orig_gpr3, + offsetof(struct pt_regs, orig_gpr3), + sizeof(struct user_pt_regs)); + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(struct user_pt_regs), -1); + + return ret; +} + +static int gpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + unsigned long reg; + int ret; + + if (target->thread.regs == NULL) + return -EIO; + + CHECK_FULL_REGS(target->thread.regs); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.regs, + 0, PT_MSR * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_MSR * sizeof(reg), + (PT_MSR + 1) * sizeof(reg)); + if (!ret) + ret = set_user_msr(target, reg); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->orig_gpr3, + PT_ORIG_R3 * sizeof(reg), + (PT_MAX_PUT_REG + 1) * sizeof(reg)); + + if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_MAX_PUT_REG + 1) * sizeof(reg), + PT_TRAP * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_TRAP * sizeof(reg), + (PT_TRAP + 1) * sizeof(reg)); + if (!ret) + ret = set_user_trap(target, reg); + } + + if (!ret) + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); + + return ret; +} + +#ifdef CONFIG_PPC64 +static int ppr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->ppr, 0, sizeof(u64)); +} + +static int ppr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->ppr, 0, sizeof(u64)); +} + +static int dscr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); +} +static int dscr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); +} +#endif +#ifdef CONFIG_PPC_BOOK3S_64 +static int tar_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); +} +static int tar_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); +} + +static int ebb_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (target->thread.used_ebb) + return regset->n; + + return 0; +} + +static int ebb_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + /* Build tests */ + BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); + BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (!target->thread.used_ebb) + return -ENODATA; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr, + 0, 3 * sizeof(unsigned long)); +} + +static int ebb_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + int ret = 0; + + /* Build tests */ + BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); + BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (target->thread.used_ebb) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr, + 0, sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ebbhr, sizeof(unsigned long), + 2 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.bescr, 2 * sizeof(unsigned long), + 3 * sizeof(unsigned long)); + + return ret; +} +static int pmu_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return regset->n; +} + +static int pmu_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + /* Build tests */ + BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); + BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); + BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); + BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.siar, + 0, 5 * sizeof(unsigned long)); +} + +static int pmu_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + int ret = 0; + + /* Build tests */ + BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); + BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); + BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); + BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.siar, + 0, sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.sdar, sizeof(unsigned long), + 2 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.sier, 2 * sizeof(unsigned long), + 3 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.mmcr2, 3 * sizeof(unsigned long), + 4 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.mmcr0, 4 * sizeof(unsigned long), + 5 * sizeof(unsigned long)); + return ret; +} +#endif + +#ifdef CONFIG_PPC_MEM_KEYS +static int pkey_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!arch_pkeys_enabled()) + return -ENODEV; + + return regset->n; +} + +static int pkey_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); + BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); + + if (!arch_pkeys_enabled()) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.amr, + 0, ELF_NPKEY * sizeof(unsigned long)); +} + +static int pkey_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + u64 new_amr; + int ret; + + if (!arch_pkeys_enabled()) + return -ENODEV; + + /* Only the AMR can be set from userspace */ + if (pos != 0 || count != sizeof(new_amr)) + return -EINVAL; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &new_amr, 0, sizeof(new_amr)); + if (ret) + return ret; + + /* UAMOR determines which bits of the AMR can be set from userspace. */ + target->thread.amr = (new_amr & target->thread.uamor) | + (target->thread.amr & ~target->thread.uamor); + + return 0; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + +static const struct user_regset native_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .get = gpr_get, .set = gpr_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .get = fpr_get, .set = fpr_set + }, +#ifdef CONFIG_ALTIVEC + [REGSET_VMX] = { + .core_note_type = NT_PPC_VMX, .n = 34, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = vr_active, .get = vr_get, .set = vr_set + }, +#endif +#ifdef CONFIG_VSX + [REGSET_VSX] = { + .core_note_type = NT_PPC_VSX, .n = 32, + .size = sizeof(double), .align = sizeof(double), + .active = vsr_active, .get = vsr_get, .set = vsr_set + }, +#endif +#ifdef CONFIG_SPE + [REGSET_SPE] = { + .core_note_type = NT_PPC_SPE, .n = 35, + .size = sizeof(u32), .align = sizeof(u32), + .active = evr_active, .get = evr_get, .set = evr_set + }, +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + [REGSET_TM_CGPR] = { + .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set + }, + [REGSET_TM_CFPR] = { + .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + }, + [REGSET_TM_CVMX] = { + .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + }, + [REGSET_TM_CVSX] = { + .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + }, + [REGSET_TM_SPR] = { + .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + }, + [REGSET_TM_CTAR] = { + .core_note_type = NT_PPC_TM_CTAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + }, + [REGSET_TM_CPPR] = { + .core_note_type = NT_PPC_TM_CPPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + }, + [REGSET_TM_CDSCR] = { + .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + }, +#endif +#ifdef CONFIG_PPC64 + [REGSET_PPR] = { + .core_note_type = NT_PPC_PPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = ppr_get, .set = ppr_set + }, + [REGSET_DSCR] = { + .core_note_type = NT_PPC_DSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = dscr_get, .set = dscr_set + }, +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + [REGSET_TAR] = { + .core_note_type = NT_PPC_TAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = tar_get, .set = tar_set + }, + [REGSET_EBB] = { + .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + .size = sizeof(u64), .align = sizeof(u64), + .active = ebb_active, .get = ebb_get, .set = ebb_set + }, + [REGSET_PMR] = { + .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, + .size = sizeof(u64), .align = sizeof(u64), + .active = pmu_active, .get = pmu_get, .set = pmu_set + }, +#endif +#ifdef CONFIG_PPC_MEM_KEYS + [REGSET_PKEY] = { + .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, + .size = sizeof(u64), .align = sizeof(u64), + .active = pkey_active, .get = pkey_get, .set = pkey_set + }, +#endif +}; + +const struct user_regset_view user_ppc_native_view = { + .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI, + .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) +}; + +#include + +int gpr32_get_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf, + unsigned long *regs) +{ + compat_ulong_t *k = kbuf; + compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < PT_MSR; --count) + *k++ = regs[pos++]; + else + for (; count > 0 && pos < PT_MSR; --count) + if (__put_user((compat_ulong_t)regs[pos++], u++)) + return -EFAULT; + + if (count > 0 && pos == PT_MSR) { + reg = get_user_msr(target); + if (kbuf) + *k++ = reg; + else if (__put_user(reg, u++)) + return -EFAULT; + ++pos; + --count; + } + + if (kbuf) + for (; count > 0 && pos < PT_REGS_COUNT; --count) + *k++ = regs[pos++]; + else + for (; count > 0 && pos < PT_REGS_COUNT; --count) + if (__put_user((compat_ulong_t)regs[pos++], u++)) + return -EFAULT; + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + PT_REGS_COUNT * sizeof(reg), -1); +} + +int gpr32_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + unsigned long *regs) +{ + const compat_ulong_t *k = kbuf; + const compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < PT_MSR; --count) + regs[pos++] = *k++; + else + for (; count > 0 && pos < PT_MSR; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + regs[pos++] = reg; + } + + + if (count > 0 && pos == PT_MSR) { + if (kbuf) + reg = *k++; + else if (__get_user(reg, u++)) + return -EFAULT; + set_user_msr(target, reg); + ++pos; + --count; + } + + if (kbuf) { + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) + regs[pos++] = *k++; + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + ++k; + } else { + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + regs[pos++] = reg; + } + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + if (__get_user(reg, u++)) + return -EFAULT; + } + + if (count > 0 && pos == PT_TRAP) { + if (kbuf) + reg = *k++; + else if (__get_user(reg, u++)) + return -EFAULT; + set_user_trap(target, reg); + ++pos; + --count; + } + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); +} + +static int gpr32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int i; + + if (target->thread.regs == NULL) + return -EIO; + + if (!FULL_REGS(target->thread.regs)) { + /* + * We have a partial register set. + * Fill 14-31 with bogus values. + */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.regs->gpr[0]); +} + +static int gpr32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (target->thread.regs == NULL) + return -EIO; + + CHECK_FULL_REGS(target->thread.regs); + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.regs->gpr[0]); +} + +/* + * These are the regset flavors matching the CONFIG_PPC32 native set. + */ +static const struct user_regset compat_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), + .get = gpr32_get, .set = gpr32_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .get = fpr_get, .set = fpr_set + }, +#ifdef CONFIG_ALTIVEC + [REGSET_VMX] = { + .core_note_type = NT_PPC_VMX, .n = 34, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = vr_active, .get = vr_get, .set = vr_set + }, +#endif +#ifdef CONFIG_SPE + [REGSET_SPE] = { + .core_note_type = NT_PPC_SPE, .n = 35, + .size = sizeof(u32), .align = sizeof(u32), + .active = evr_active, .get = evr_get, .set = evr_set + }, +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + [REGSET_TM_CGPR] = { + .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .active = tm_cgpr_active, + .get = tm_cgpr32_get, .set = tm_cgpr32_set + }, + [REGSET_TM_CFPR] = { + .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + }, + [REGSET_TM_CVMX] = { + .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + }, + [REGSET_TM_CVSX] = { + .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + }, + [REGSET_TM_SPR] = { + .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + }, + [REGSET_TM_CTAR] = { + .core_note_type = NT_PPC_TM_CTAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + }, + [REGSET_TM_CPPR] = { + .core_note_type = NT_PPC_TM_CPPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + }, + [REGSET_TM_CDSCR] = { + .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + }, +#endif +#ifdef CONFIG_PPC64 + [REGSET_PPR] = { + .core_note_type = NT_PPC_PPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = ppr_get, .set = ppr_set + }, + [REGSET_DSCR] = { + .core_note_type = NT_PPC_DSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = dscr_get, .set = dscr_set + }, +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + [REGSET_TAR] = { + .core_note_type = NT_PPC_TAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = tar_get, .set = tar_set + }, + [REGSET_EBB] = { + .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + .size = sizeof(u64), .align = sizeof(u64), + .active = ebb_active, .get = ebb_get, .set = ebb_set + }, +#endif +}; + +static const struct user_regset_view user_ppc_compat_view = { + .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI, + .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + if (IS_ENABLED(CONFIG_PPC64) && test_tsk_thread_flag(task, TIF_32BIT)) + return &user_ppc_compat_view; + return &user_ppc_native_view; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c new file mode 100644 index 00000000000000..d53466d49cc0ea --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include + +#include "ptrace-decl.h" + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + */ +int fpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + u64 buf[33]; + int i; + + flush_fp_to_thread(target); + + /* copy to local buffer then write that out */ + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + * + */ +int fpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[33]; + int i; + + flush_fp_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + + /* copy to local buffer then write that out */ + i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + if (i) + return i; + + for (i = 0; i < 32 ; i++) + target->thread.TS_FPR(i) = buf[i]; + target->thread.fp_state.fpscr = buf[32]; + return 0; +} + +/* + * Currently to set and and get all the vsx state, you need to call + * the fp and VMX calls as well. This only get/sets the lower 32 + * 128bit VSX registers. + */ + +int vsr_active(struct task_struct *target, const struct user_regset *regset) +{ + flush_vsx_to_thread(target); + return target->thread.used_vsr ? regset->n : 0; +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last + * checkpointed value of all FPR registers for the current + * transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 vsx[32]; + * }; + */ +int vsr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + + return ret; +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last + * checkpointed value of all FPR registers for the current + * transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 vsx[32]; + * }; + */ +int vsr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + if (!ret) + for (i = 0; i < 32 ; i++) + target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + + return ret; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c new file mode 100644 index 00000000000000..f6e51be47c6e4d --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -0,0 +1,481 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/m68k/kernel/ptrace.c" + * Copyright (C) 1994 by Hamish Macdonald + * Taken from linux/kernel/ptrace.c and modified for M680x0. + * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds + * + * Modified by Cort Dougan (cort@hq.fsmlabs.com) + * and Paul Mackerras (paulus@samba.org). + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +#include "ptrace-decl.h" + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *child) +{ + /* make sure the single step bit is not set. */ + user_disable_single_step(child); +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret = -EPERM; + void __user *datavp = (void __user *) data; + unsigned long __user *datalp = datavp; + + switch (request) { + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: { + unsigned long index, tmp; + + ret = -EIO; + /* convert to index and check */ +#ifdef CONFIG_PPC32 + index = addr >> 2; + if ((addr & 3) || (index > PT_FPSCR) + || (child->thread.regs == NULL)) +#else + index = addr >> 3; + if ((addr & 7) || (index > PT_FPSCR)) +#endif + break; + + CHECK_FULL_REGS(child->thread.regs); + if (index < PT_FPR0) { + ret = ptrace_get_reg(child, (int) index, &tmp); + if (ret) + break; + } else { + unsigned int fpidx = index - PT_FPR0; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(&tmp, &child->thread.TS_FPR(fpidx), + sizeof(long)); + else + tmp = child->thread.fp_state.fpscr; + } + ret = put_user(tmp, datalp); + break; + } + + /* write the word at location addr in the USER area */ + case PTRACE_POKEUSR: { + unsigned long index; + + ret = -EIO; + /* convert to index and check */ +#ifdef CONFIG_PPC32 + index = addr >> 2; + if ((addr & 3) || (index > PT_FPSCR) + || (child->thread.regs == NULL)) +#else + index = addr >> 3; + if ((addr & 7) || (index > PT_FPSCR)) +#endif + break; + + CHECK_FULL_REGS(child->thread.regs); + if (index < PT_FPR0) { + ret = ptrace_put_reg(child, index, data); + } else { + unsigned int fpidx = index - PT_FPR0; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(&child->thread.TS_FPR(fpidx), &data, + sizeof(long)); + else + child->thread.fp_state.fpscr = data; + ret = 0; + } + break; + } + + case PPC_PTRACE_GETHWDBGINFO: { + struct ppc_debug_info dbginfo; + + ppc_gethwdinfo(&dbginfo); + + if (copy_to_user(datavp, &dbginfo, + sizeof(struct ppc_debug_info))) + return -EFAULT; + return 0; + } + + case PPC_PTRACE_SETHWDEBUG: { + struct ppc_hw_breakpoint bp_info; + + if (copy_from_user(&bp_info, datavp, + sizeof(struct ppc_hw_breakpoint))) + return -EFAULT; + return ppc_set_hwdebug(child, &bp_info); + } + + case PPC_PTRACE_DELHWDEBUG: { + ret = ppc_del_hwdebug(child, data); + break; + } + + case PTRACE_GET_DEBUGREG: + ret = ptrace_get_debugreg(child, addr, datalp); + break; + + case PTRACE_SET_DEBUGREG: + ret = ptrace_set_debugreg(child, addr, data); + break; + +#ifdef CONFIG_PPC64 + case PTRACE_GETREGS64: +#endif + case PTRACE_GETREGS: /* Get all pt_regs from the child. */ + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_GPR, + 0, sizeof(struct user_pt_regs), + datavp); + +#ifdef CONFIG_PPC64 + case PTRACE_SETREGS64: +#endif + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_GPR, + 0, sizeof(struct user_pt_regs), + datavp); + + case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */ + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_FPR, + 0, sizeof(elf_fpregset_t), + datavp); + + case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */ + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_FPR, + 0, sizeof(elf_fpregset_t), + datavp); + +#ifdef CONFIG_ALTIVEC + case PTRACE_GETVRREGS: + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_VMX, + 0, (33 * sizeof(vector128) + + sizeof(u32)), + datavp); + + case PTRACE_SETVRREGS: + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_VMX, + 0, (33 * sizeof(vector128) + + sizeof(u32)), + datavp); +#endif +#ifdef CONFIG_VSX + case PTRACE_GETVSRREGS: + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_VSX, + 0, 32 * sizeof(double), + datavp); + + case PTRACE_SETVSRREGS: + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_VSX, + 0, 32 * sizeof(double), + datavp); +#endif +#ifdef CONFIG_SPE + case PTRACE_GETEVRREGS: + /* Get the child spe register state. */ + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_SPE, 0, 35 * sizeof(u32), + datavp); + + case PTRACE_SETEVRREGS: + /* Set the child spe register state. */ + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_SPE, 0, 35 * sizeof(u32), + datavp); +#endif + + default: + ret = ptrace_request(child, request, addr, data); + break; + } + return ret; +} + +#ifdef CONFIG_SECCOMP +static int do_seccomp(struct pt_regs *regs) +{ + if (!test_thread_flag(TIF_SECCOMP)) + return 0; + + /* + * The ABI we present to seccomp tracers is that r3 contains + * the syscall return value and orig_gpr3 contains the first + * syscall parameter. This is different to the ptrace ABI where + * both r3 and orig_gpr3 contain the first syscall parameter. + */ + regs->gpr[3] = -ENOSYS; + + /* + * We use the __ version here because we have already checked + * TIF_SECCOMP. If this fails, there is nothing left to do, we + * have already loaded -ENOSYS into r3, or seccomp has put + * something else in r3 (via SECCOMP_RET_ERRNO/TRACE). + */ + if (__secure_computing(NULL)) + return -1; + + /* + * The syscall was allowed by seccomp, restore the register + * state to what audit expects. + * Note that we use orig_gpr3, which means a seccomp tracer can + * modify the first syscall parameter (in orig_gpr3) and also + * allow the syscall to proceed. + */ + regs->gpr[3] = regs->orig_gpr3; + + return 0; +} +#else +static inline int do_seccomp(struct pt_regs *regs) { return 0; } +#endif /* CONFIG_SECCOMP */ + +/** + * do_syscall_trace_enter() - Do syscall tracing on kernel entry. + * @regs: the pt_regs of the task to trace (current) + * + * Performs various types of tracing on syscall entry. This includes seccomp, + * ptrace, syscall tracepoints and audit. + * + * The pt_regs are potentially visible to userspace via ptrace, so their + * contents is ABI. + * + * One or more of the tracers may modify the contents of pt_regs, in particular + * to modify arguments or even the syscall number itself. + * + * It's also possible that a tracer can choose to reject the system call. In + * that case this function will return an illegal syscall number, and will put + * an appropriate return value in regs->r3. + * + * Return: the (possibly changed) syscall number. + */ +long do_syscall_trace_enter(struct pt_regs *regs) +{ + u32 flags; + + user_exit(); + + flags = READ_ONCE(current_thread_info()->flags) & + (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); + + if (flags) { + int rc = tracehook_report_syscall_entry(regs); + + if (unlikely(flags & _TIF_SYSCALL_EMU)) { + /* + * A nonzero return code from + * tracehook_report_syscall_entry() tells us to prevent + * the syscall execution, but we are not going to + * execute it anyway. + * + * Returning -1 will skip the syscall execution. We want + * to avoid clobbering any registers, so we don't goto + * the skip label below. + */ + return -1; + } + + if (rc) { + /* + * The tracer decided to abort the syscall. Note that + * the tracer may also just change regs->gpr[0] to an + * invalid syscall number, that is handled below on the + * exit path. + */ + goto skip; + } + } + + /* Run seccomp after ptrace; allow it to set gpr[3]. */ + if (do_seccomp(regs)) + return -1; + + /* Avoid trace and audit when syscall is invalid. */ + if (regs->gpr[0] >= NR_syscalls) + goto skip; + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->gpr[0]); + + if (!is_32bit_task()) + audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4], + regs->gpr[5], regs->gpr[6]); + else + audit_syscall_entry(regs->gpr[0], + regs->gpr[3] & 0xffffffff, + regs->gpr[4] & 0xffffffff, + regs->gpr[5] & 0xffffffff, + regs->gpr[6] & 0xffffffff); + + /* Return the possibly modified but valid syscall number */ + return regs->gpr[0]; + +skip: + /* + * If we are aborting explicitly, or if the syscall number is + * now invalid, set the return value to -ENOSYS. + */ + regs->gpr[3] = -ENOSYS; + return -1; +} + +void do_syscall_trace_leave(struct pt_regs *regs) +{ + int step; + + audit_syscall_exit(regs); + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->result); + + step = test_thread_flag(TIF_SINGLESTEP); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, step); + + user_enter(); +} + +void __init pt_regs_check(void); + +/* + * Dummy function, its purpose is to break the build if struct pt_regs and + * struct user_pt_regs don't match. + */ +void __init pt_regs_check(void) +{ + BUILD_BUG_ON(offsetof(struct pt_regs, gpr) != + offsetof(struct user_pt_regs, gpr)); + BUILD_BUG_ON(offsetof(struct pt_regs, nip) != + offsetof(struct user_pt_regs, nip)); + BUILD_BUG_ON(offsetof(struct pt_regs, msr) != + offsetof(struct user_pt_regs, msr)); + BUILD_BUG_ON(offsetof(struct pt_regs, msr) != + offsetof(struct user_pt_regs, msr)); + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct user_pt_regs, orig_gpr3)); + BUILD_BUG_ON(offsetof(struct pt_regs, ctr) != + offsetof(struct user_pt_regs, ctr)); + BUILD_BUG_ON(offsetof(struct pt_regs, link) != + offsetof(struct user_pt_regs, link)); + BUILD_BUG_ON(offsetof(struct pt_regs, xer) != + offsetof(struct user_pt_regs, xer)); + BUILD_BUG_ON(offsetof(struct pt_regs, ccr) != + offsetof(struct user_pt_regs, ccr)); +#ifdef __powerpc64__ + BUILD_BUG_ON(offsetof(struct pt_regs, softe) != + offsetof(struct user_pt_regs, softe)); +#else + BUILD_BUG_ON(offsetof(struct pt_regs, mq) != + offsetof(struct user_pt_regs, mq)); +#endif + BUILD_BUG_ON(offsetof(struct pt_regs, trap) != + offsetof(struct user_pt_regs, trap)); + BUILD_BUG_ON(offsetof(struct pt_regs, dar) != + offsetof(struct user_pt_regs, dar)); + BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) != + offsetof(struct user_pt_regs, dsisr)); + BUILD_BUG_ON(offsetof(struct pt_regs, result) != + offsetof(struct user_pt_regs, result)); + + BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs)); + + // Now check that the pt_regs offsets match the uapi #defines + #define CHECK_REG(_pt, _reg) \ + BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \ + sizeof(unsigned long))); + + CHECK_REG(PT_R0, gpr[0]); + CHECK_REG(PT_R1, gpr[1]); + CHECK_REG(PT_R2, gpr[2]); + CHECK_REG(PT_R3, gpr[3]); + CHECK_REG(PT_R4, gpr[4]); + CHECK_REG(PT_R5, gpr[5]); + CHECK_REG(PT_R6, gpr[6]); + CHECK_REG(PT_R7, gpr[7]); + CHECK_REG(PT_R8, gpr[8]); + CHECK_REG(PT_R9, gpr[9]); + CHECK_REG(PT_R10, gpr[10]); + CHECK_REG(PT_R11, gpr[11]); + CHECK_REG(PT_R12, gpr[12]); + CHECK_REG(PT_R13, gpr[13]); + CHECK_REG(PT_R14, gpr[14]); + CHECK_REG(PT_R15, gpr[15]); + CHECK_REG(PT_R16, gpr[16]); + CHECK_REG(PT_R17, gpr[17]); + CHECK_REG(PT_R18, gpr[18]); + CHECK_REG(PT_R19, gpr[19]); + CHECK_REG(PT_R20, gpr[20]); + CHECK_REG(PT_R21, gpr[21]); + CHECK_REG(PT_R22, gpr[22]); + CHECK_REG(PT_R23, gpr[23]); + CHECK_REG(PT_R24, gpr[24]); + CHECK_REG(PT_R25, gpr[25]); + CHECK_REG(PT_R26, gpr[26]); + CHECK_REG(PT_R27, gpr[27]); + CHECK_REG(PT_R28, gpr[28]); + CHECK_REG(PT_R29, gpr[29]); + CHECK_REG(PT_R30, gpr[30]); + CHECK_REG(PT_R31, gpr[31]); + CHECK_REG(PT_NIP, nip); + CHECK_REG(PT_MSR, msr); + CHECK_REG(PT_ORIG_R3, orig_gpr3); + CHECK_REG(PT_CTR, ctr); + CHECK_REG(PT_LNK, link); + CHECK_REG(PT_XER, xer); + CHECK_REG(PT_CCR, ccr); +#ifdef CONFIG_PPC64 + CHECK_REG(PT_SOFTE, softe); +#else + CHECK_REG(PT_MQ, mq); +#endif + CHECK_REG(PT_TRAP, trap); + CHECK_REG(PT_DAR, dar); + CHECK_REG(PT_DSISR, dsisr); + CHECK_REG(PT_RESULT, result); + #undef CHECK_REG + + BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long)); + + /* + * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the + * real registers. + */ + BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); +} diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c similarity index 96% rename from arch/powerpc/kernel/ptrace32.c rename to arch/powerpc/kernel/ptrace/ptrace32.c index f37eb53de1a1f1..7976ddf29c0eae 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -17,21 +17,10 @@ * this archive for more details. */ -#include -#include -#include -#include -#include #include #include -#include -#include -#include #include -#include -#include -#include #include /* diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 7f8c890360fe4c..f9c0d888ce8a5c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -787,8 +787,7 @@ EXPORT_SYMBOL(powerpc_debugfs_root); static int powerpc_debugfs_init(void) { powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL); - - return powerpc_debugfs_root == NULL; + return 0; } arch_initcall(powerpc_debugfs_init); #endif diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 2dd0d9cb5a2086..2ec835574cc940 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -8,6 +8,12 @@ #ifndef __ARCH_POWERPC_KERNEL_SETUP_H #define __ARCH_POWERPC_KERNEL_SETUP_H +#ifdef CONFIG_CC_IS_CLANG +#define __nostackprotector +#else +#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) +#endif + void initialize_cache_info(void); void irqstack_early_init(void); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 5b49b26eb15419..305ca89d856fee 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -58,7 +58,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid_phys); int smp_hw_index[NR_CPUS]; EXPORT_SYMBOL(smp_hw_index); -unsigned long ISA_DMA_THRESHOLD; unsigned int DMA_MODE_READ; unsigned int DMA_MODE_WRITE; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e05e6dd67ae6b9..438a9befce4159 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -279,24 +279,42 @@ void __init record_spr_defaults(void) * device-tree is not accessible via normal means at this point. */ -void __init early_setup(unsigned long dt_ptr) +void __init __nostackprotector early_setup(unsigned long dt_ptr) { static __initdata struct paca_struct boot_paca; /* -------- printk is _NOT_ safe to use here ! ------- */ - /* Try new device tree based feature discovery ... */ - if (!dt_cpu_ftrs_init(__va(dt_ptr))) - /* Otherwise use the old style CPU table */ - identify_cpu(0, mfspr(SPRN_PVR)); - - /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ + /* + * Assume we're on cpu 0 for now. + * + * We need to load a PACA very early for a few reasons. + * + * The stack protector canary is stored in the paca, so as soon as we + * call any stack protected code we need r13 pointing somewhere valid. + * + * If we are using kcov it will call in_task() in its instrumentation, + * which relies on the current task from the PACA. + * + * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as + * printk(), which can trigger both stack protector and kcov. + * + * percpu variables and spin locks also use the paca. + * + * So set up a temporary paca. It will be replaced below once we know + * what CPU we are on. + */ initialise_paca(&boot_paca, 0); setup_paca(&boot_paca); fixup_boot_paca(); /* -------- printk is now safe to use ------- */ + /* Try new device tree based feature discovery ... */ + if (!dt_cpu_ftrs_init(__va(dt_ptr))) + /* Otherwise use the old style CPU table */ + identify_cpu(0, mfspr(SPRN_PVR)); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 8004336858884b..d396efca406801 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -10,8 +10,6 @@ #ifndef _POWERPC_ARCH_SIGNAL_H #define _POWERPC_ARCH_SIGNAL_H -extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); - extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, size_t frame_size, int is_32); diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 84ed2e77ef9c3f..adfde59cf4ba8a 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -473,8 +473,10 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, err |= __get_user(tsk->thread.ckpt_regs.ccr, &sc->gp_regs[PT_CCR]); + /* Don't allow userspace to set the trap value */ + regs->trap = 0; + /* These regs are not checkpointed; they can go in 'regs'. */ - err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ea6adbf6a22119..6d2a3a3666f09f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1185,10 +1185,30 @@ static inline void add_cpu_to_smallcore_masks(int cpu) } } +int get_physical_package_id(int cpu) +{ + int pkg_id = cpu_to_chip_id(cpu); + + /* + * If the platform is PowerNV or Guest on KVM, ibm,chip-id is + * defined. Hence we would return the chip-id as the result of + * get_physical_package_id. + */ + if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR) && + IS_ENABLED(CONFIG_PPC_SPLPAR)) { + struct device_node *np = of_get_cpu_node(cpu, NULL); + pkg_id = of_node_to_nid(np); + of_node_put(np); + } + + return pkg_id; +} +EXPORT_SYMBOL_GPL(get_physical_package_id); + static void add_cpu_to_masks(int cpu) { int first_thread = cpu_first_thread_sibling(cpu); - int chipid = cpu_to_chip_id(cpu); + int pkg_id = get_physical_package_id(cpu); int i; /* @@ -1217,11 +1237,11 @@ static void add_cpu_to_masks(int cpu) for_each_cpu(i, cpu_l2_cache_mask(cpu)) set_cpus_related(cpu, i, cpu_core_mask); - if (chipid == -1) + if (pkg_id == -1) return; for_each_cpu(i, cpu_online_mask) - if (cpu_to_chip_id(i) == chipid) + if (get_physical_package_id(i) == pkg_id) set_cpus_related(cpu, i, cpu_core_mask); } @@ -1359,11 +1379,6 @@ void __init smp_cpus_done(unsigned int max_cpus) if (smp_ops && smp_ops->bringup_done) smp_ops->bringup_done(); - /* - * On a shared LPAR, associativity needs to be requested. - * Hence, get numa topology before dumping cpu topology - */ - shared_proc_topology_init(); dump_numa_cpu_topology(); #ifdef CONFIG_SCHED_SMT diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index e2a46cfed5fd1b..c477b8585a297a 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -57,7 +57,7 @@ void save_stack_trace(struct stack_trace *trace) { unsigned long sp; - sp = current_stack_pointer(); + sp = current_stack_frame(); save_context_stack(trace, sp, current, 1); } @@ -71,7 +71,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) return; if (tsk == current) - sp = current_stack_pointer(); + sp = current_stack_frame(); else sp = tsk->thread.ksp; @@ -131,7 +131,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, } if (tsk == current) - sp = current_stack_pointer(); + sp = current_stack_frame(); else sp = tsk->thread.ksp; diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c new file mode 100644 index 00000000000000..cf06eb443a8055 --- /dev/null +++ b/arch/powerpc/kernel/syscall_64.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef long (*syscall_fn)(long, long, long, long, long, long); + +/* Has to run notrace because it is entered not completely "reconciled" */ +notrace long system_call_exception(long r3, long r4, long r5, + long r6, long r7, long r8, + unsigned long r0, struct pt_regs *regs) +{ + unsigned long ti_flags; + syscall_fn f; + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + + trace_hardirqs_off(); /* finish reconciling */ + + if (IS_ENABLED(CONFIG_PPC_BOOK3S)) + BUG_ON(!(regs->msr & MSR_RI)); + BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(!FULL_REGS(regs)); + BUG_ON(regs->softe != IRQS_ENABLED); + + account_cpu_user_entry(); + +#ifdef CONFIG_PPC_SPLPAR + if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && + firmware_has_feature(FW_FEATURE_SPLPAR)) { + struct lppaca *lp = local_paca->lppaca_ptr; + + if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx))) + accumulate_stolen_time(); + } +#endif + + kuap_check_amr(); + + /* + * This is not required for the syscall exit path, but makes the + * stack frame look nicer. If this was initialised in the first stack + * frame, or if the unwinder was taught the first stack frame always + * returns to user with IRQS_ENABLED, this store could be avoided! + */ + regs->softe = IRQS_ENABLED; + + local_irq_enable(); + + ti_flags = current_thread_info()->flags; + if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { + /* + * We use the return value of do_syscall_trace_enter() as the + * syscall number. If the syscall was rejected for any reason + * do_syscall_trace_enter() returns an invalid syscall number + * and the test against NR_syscalls will fail and the return + * value to be used is in regs->gpr[3]. + */ + r0 = do_syscall_trace_enter(regs); + if (unlikely(r0 >= NR_syscalls)) + return regs->gpr[3]; + r3 = regs->gpr[3]; + r4 = regs->gpr[4]; + r5 = regs->gpr[5]; + r6 = regs->gpr[6]; + r7 = regs->gpr[7]; + r8 = regs->gpr[8]; + + } else if (unlikely(r0 >= NR_syscalls)) { + return -ENOSYS; + } + + /* May be faster to do array_index_nospec? */ + barrier_nospec(); + + if (unlikely(ti_flags & _TIF_32BIT)) { + f = (void *)compat_sys_call_table[r0]; + + r3 &= 0x00000000ffffffffULL; + r4 &= 0x00000000ffffffffULL; + r5 &= 0x00000000ffffffffULL; + r6 &= 0x00000000ffffffffULL; + r7 &= 0x00000000ffffffffULL; + r8 &= 0x00000000ffffffffULL; + + } else { + f = (void *)sys_call_table[r0]; + } + + return f(r3, r4, r5, r6, r7, r8); +} + +/* + * This should be called after a syscall returns, with r3 the return value + * from the syscall. If this function returns non-zero, the system call + * exit assembly should additionally load all GPR registers and CTR and XER + * from the interrupt frame. + * + * The function graph tracer can not trace the return side of this function, + * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. + */ +notrace unsigned long syscall_exit_prepare(unsigned long r3, + struct pt_regs *regs) +{ + unsigned long *ti_flagsp = ¤t_thread_info()->flags; + unsigned long ti_flags; + unsigned long ret = 0; + + regs->result = r3; + + /* Check whether the syscall is issued inside a restartable sequence */ + rseq_syscall(regs); + + ti_flags = *ti_flagsp; + + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) { + if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { + r3 = -r3; + regs->ccr |= 0x10000000; /* Set SO bit in CR */ + } + } + + if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { + if (ti_flags & _TIF_RESTOREALL) + ret = _TIF_RESTOREALL; + else + regs->gpr[3] = r3; + clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp); + } else { + regs->gpr[3] = r3; + } + + if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { + do_syscall_trace_leave(regs); + ret |= _TIF_RESTOREALL; + } + +again: + local_irq_disable(); + ti_flags = READ_ONCE(*ti_flagsp); + while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { + local_irq_enable(); + if (ti_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + /* + * SIGPENDING must restore signal handler function + * argument GPRs, and some non-volatiles (e.g., r1). + * Restore all for now. This could be made lighter. + */ + if (ti_flags & _TIF_SIGPENDING) + ret |= _TIF_RESTOREALL; + do_notify_resume(regs, ti_flags); + } + local_irq_disable(); + ti_flags = READ_ONCE(*ti_flagsp); + } + + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely((ti_flags & _TIF_RESTORE_TM))) { + restore_tm_state(regs); + } else { + unsigned long mathflags = MSR_FP; + + if (cpu_has_feature(CPU_FTR_VSX)) + mathflags |= MSR_VEC | MSR_VSX; + else if (cpu_has_feature(CPU_FTR_ALTIVEC)) + mathflags |= MSR_VEC; + + if ((regs->msr & mathflags) != mathflags) + restore_math(regs); + } + } + + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + + /* This pattern matches prep_irq_for_idle */ + __hard_EE_RI_disable(); + if (unlikely(lazy_irq_pending())) { + __hard_RI_enable(); + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + local_irq_enable(); + /* Took an interrupt, may have more exit work to do. */ + goto again; + } + local_paca->irq_happened = 0; + irq_soft_mask_set(IRQS_ENABLED); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + local_paca->tm_scratch = regs->msr; +#endif + + kuap_check_amr(); + + account_cpu_user_exit(); + + return ret; +} + +#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) +{ +#ifdef CONFIG_PPC_BOOK3E + struct thread_struct *ts = ¤t->thread; +#endif + unsigned long *ti_flagsp = ¤t_thread_info()->flags; + unsigned long ti_flags; + unsigned long flags; + unsigned long ret = 0; + + if (IS_ENABLED(CONFIG_PPC_BOOK3S)) + BUG_ON(!(regs->msr & MSR_RI)); + BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(!FULL_REGS(regs)); + BUG_ON(regs->softe != IRQS_ENABLED); + + local_irq_save(flags); + +again: + ti_flags = READ_ONCE(*ti_flagsp); + while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { + local_irq_enable(); /* returning to user: may enable */ + if (ti_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + if (ti_flags & _TIF_SIGPENDING) + ret |= _TIF_RESTOREALL; + do_notify_resume(regs, ti_flags); + } + local_irq_disable(); + ti_flags = READ_ONCE(*ti_flagsp); + } + + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely((ti_flags & _TIF_RESTORE_TM))) { + restore_tm_state(regs); + } else { + unsigned long mathflags = MSR_FP; + + if (cpu_has_feature(CPU_FTR_VSX)) + mathflags |= MSR_VEC | MSR_VSX; + else if (cpu_has_feature(CPU_FTR_ALTIVEC)) + mathflags |= MSR_VEC; + + if ((regs->msr & mathflags) != mathflags) + restore_math(regs); + } + } + + trace_hardirqs_on(); + __hard_EE_RI_disable(); + if (unlikely(lazy_irq_pending())) { + __hard_RI_enable(); + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + local_irq_enable(); + local_irq_disable(); + /* Took an interrupt, may have more exit work to do. */ + goto again; + } + local_paca->irq_happened = 0; + irq_soft_mask_set(IRQS_ENABLED); + +#ifdef CONFIG_PPC_BOOK3E + if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) { + /* + * Check to see if the dbcr0 register is set up to debug. + * Use the internal debug mode bit to do this. + */ + mtmsr(mfmsr() & ~MSR_DE); + mtspr(SPRN_DBCR0, ts->debug.dbcr0); + mtspr(SPRN_DBSR, -1); + } +#endif + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + local_paca->tm_scratch = regs->msr; +#endif + + kuap_check_amr(); + + account_cpu_user_exit(); + + return ret; +} + +void unrecoverable_exception(struct pt_regs *regs); +void preempt_schedule_irq(void); + +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) +{ + unsigned long *ti_flagsp = ¤t_thread_info()->flags; + unsigned long flags; + unsigned long ret = 0; + + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI))) + unrecoverable_exception(regs); + BUG_ON(regs->msr & MSR_PR); + BUG_ON(!FULL_REGS(regs)); + + if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) { + clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp); + ret = 1; + } + + local_irq_save(flags); + + if (regs->softe == IRQS_ENABLED) { + /* Returning to a kernel context with local irqs enabled. */ + WARN_ON_ONCE(!(regs->msr & MSR_EE)); +again: + if (IS_ENABLED(CONFIG_PREEMPT)) { + /* Return to preemptible kernel context */ + if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) { + if (preempt_count() == 0) + preempt_schedule_irq(); + } + } + + trace_hardirqs_on(); + __hard_EE_RI_disable(); + if (unlikely(lazy_irq_pending())) { + __hard_RI_enable(); + irq_soft_mask_set(IRQS_ALL_DISABLED); + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + /* + * Can't local_irq_restore to replay if we were in + * interrupt context. Must replay directly. + */ + if (irqs_disabled_flags(flags)) { + replay_soft_interrupts(); + } else { + local_irq_restore(flags); + local_irq_save(flags); + } + /* Took an interrupt, may have more exit work to do. */ + goto again; + } + local_paca->irq_happened = 0; + irq_soft_mask_set(IRQS_ENABLED); + } else { + /* Returning to a kernel context with local irqs disabled. */ + __hard_EE_RI_disable(); + if (regs->msr & MSR_EE) + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + } + + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + local_paca->tm_scratch = regs->msr; +#endif + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + kuap_restore_amr(regs); + + return ret; +} +#endif diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 35b61bfc1b1ae9..220ae11555f2e1 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -9,7 +9,9 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 nospu fork ppc_fork +2 32 fork ppc_fork sys_fork +2 64 fork sys_fork +2 spu fork sys_ni_syscall 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -158,7 +160,9 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 nospu clone ppc_clone +120 32 clone ppc_clone sys_clone +120 64 clone sys_clone +120 spu clone sys_ni_syscall 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -240,7 +244,9 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 nospu vfork ppc_vfork +189 32 vfork ppc_vfork sys_vfork +189 64 vfork sys_vfork +189 spu vfork sys_ni_syscall 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -316,8 +322,8 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext ppc32_swapcontext -249 64 swapcontext ppc64_swapcontext +249 32 swapcontext ppc_swapcontext compat_sys_swapcontext +249 64 swapcontext sys_swapcontext 249 spu swapcontext sys_ni_syscall 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 @@ -456,7 +462,7 @@ 361 common bpf sys_bpf 362 nospu execveat sys_execveat compat_sys_execveat 363 32 switch_endian sys_ni_syscall -363 64 switch_endian ppc_switch_endian +363 64 switch_endian sys_switch_endian 363 spu switch_endian sys_ni_syscall 364 common userfaultfd sys_userfaultfd 365 common membarrier sys_membarrier @@ -516,6 +522,8 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 nospu clone3 ppc_clone3 +435 32 clone3 ppc_clone3 sys_clone3 +435 64 clone3 sys_clone3 +435 spu clone3 sys_ni_syscall 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 80a676da11cbcb..479c70680b76df 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -87,6 +87,155 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay); #endif /* CONFIG_PPC64 */ +#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \ +static void read_##NAME(void *val) \ +{ \ + *(unsigned long *)val = mfspr(ADDRESS); \ +} \ +static void write_##NAME(void *val) \ +{ \ + EXTRA; \ + mtspr(ADDRESS, *(unsigned long *)val); \ +} + +#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \ +static ssize_t show_##NAME(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ + unsigned long val; \ + smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \ + return sprintf(buf, "%lx\n", val); \ +} \ +static ssize_t __used \ + store_##NAME(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ + unsigned long val; \ + int ret = sscanf(buf, "%lx", &val); \ + if (ret != 1) \ + return -EINVAL; \ + smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \ + return count; \ +} + +#define SYSFS_PMCSETUP(NAME, ADDRESS) \ + __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \ + __SYSFS_SPRSETUP_SHOW_STORE(NAME) +#define SYSFS_SPRSETUP(NAME, ADDRESS) \ + __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \ + __SYSFS_SPRSETUP_SHOW_STORE(NAME) + +#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \ + __SYSFS_SPRSETUP_SHOW_STORE(NAME) + +#ifdef CONFIG_PPC64 + +/* + * This is the system wide DSCR register default value. Any + * change to this default value through the sysfs interface + * will update all per cpu DSCR default values across the + * system stored in their respective PACA structures. + */ +static unsigned long dscr_default; + +/** + * read_dscr() - Fetch the cpu specific DSCR default + * @val: Returned cpu specific DSCR default value + * + * This function returns the per cpu DSCR default value + * for any cpu which is contained in it's PACA structure. + */ +static void read_dscr(void *val) +{ + *(unsigned long *)val = get_paca()->dscr_default; +} + + +/** + * write_dscr() - Update the cpu specific DSCR default + * @val: New cpu specific DSCR default value to update + * + * This function updates the per cpu DSCR default value + * for any cpu which is contained in it's PACA structure. + */ +static void write_dscr(void *val) +{ + get_paca()->dscr_default = *(unsigned long *)val; + if (!current->thread.dscr_inherit) { + current->thread.dscr = *(unsigned long *)val; + mtspr(SPRN_DSCR, *(unsigned long *)val); + } +} + +SYSFS_SPRSETUP_SHOW_STORE(dscr); +static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); + +static void add_write_permission_dev_attr(struct device_attribute *attr) +{ + attr->attr.mode |= 0200; +} + +/** + * show_dscr_default() - Fetch the system wide DSCR default + * @dev: Device structure + * @attr: Device attribute structure + * @buf: Interface buffer + * + * This function returns the system wide DSCR default value. + */ +static ssize_t show_dscr_default(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%lx\n", dscr_default); +} + +/** + * store_dscr_default() - Update the system wide DSCR default + * @dev: Device structure + * @attr: Device attribute structure + * @buf: Interface buffer + * @count: Size of the update + * + * This function updates the system wide DSCR default value. + */ +static ssize_t __used store_dscr_default(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + unsigned long val; + int ret = 0; + + ret = sscanf(buf, "%lx", &val); + if (ret != 1) + return -EINVAL; + dscr_default = val; + + on_each_cpu(write_dscr, &val, 1); + + return count; +} + +static DEVICE_ATTR(dscr_default, 0600, + show_dscr_default, store_dscr_default); + +static void sysfs_create_dscr_default(void) +{ + if (cpu_has_feature(CPU_FTR_DSCR)) { + int err = 0; + int cpu; + + dscr_default = spr_default_dscr; + for_each_possible_cpu(cpu) + paca_ptrs[cpu]->dscr_default = dscr_default; + + err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); + } +} +#endif /* CONFIG_PPC64 */ + #ifdef CONFIG_PPC_FSL_BOOK3E #define MAX_BIT 63 @@ -407,84 +556,35 @@ void ppc_enable_pmcs(void) } EXPORT_SYMBOL(ppc_enable_pmcs); -#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \ -static void read_##NAME(void *val) \ -{ \ - *(unsigned long *)val = mfspr(ADDRESS); \ -} \ -static void write_##NAME(void *val) \ -{ \ - EXTRA; \ - mtspr(ADDRESS, *(unsigned long *)val); \ -} -#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \ -static ssize_t show_##NAME(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ -{ \ - struct cpu *cpu = container_of(dev, struct cpu, dev); \ - unsigned long val; \ - smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \ - return sprintf(buf, "%lx\n", val); \ -} \ -static ssize_t __used \ - store_##NAME(struct device *dev, struct device_attribute *attr, \ - const char *buf, size_t count) \ -{ \ - struct cpu *cpu = container_of(dev, struct cpu, dev); \ - unsigned long val; \ - int ret = sscanf(buf, "%lx", &val); \ - if (ret != 1) \ - return -EINVAL; \ - smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \ - return count; \ -} - -#define SYSFS_PMCSETUP(NAME, ADDRESS) \ - __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \ - __SYSFS_SPRSETUP_SHOW_STORE(NAME) -#define SYSFS_SPRSETUP(NAME, ADDRESS) \ - __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \ - __SYSFS_SPRSETUP_SHOW_STORE(NAME) - -#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \ - __SYSFS_SPRSETUP_SHOW_STORE(NAME) /* Let's define all possible registers, we'll only hook up the ones * that are implemented on the current processor */ -#if defined(CONFIG_PPC64) +#ifdef CONFIG_PMU_SYSFS +#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_BOOK3S_32) #define HAS_PPC_PMC_CLASSIC 1 #define HAS_PPC_PMC_IBM 1 -#define HAS_PPC_PMC_PA6T 1 -#elif defined(CONFIG_PPC_BOOK3S_32) -#define HAS_PPC_PMC_CLASSIC 1 -#define HAS_PPC_PMC_IBM 1 -#define HAS_PPC_PMC_G4 1 #endif +#ifdef CONFIG_PPC64 +#define HAS_PPC_PMC_PA6T 1 +#define HAS_PPC_PMC56 1 +#endif -#ifdef HAS_PPC_PMC_CLASSIC -SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0); -SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1); -SYSFS_PMCSETUP(pmc1, SPRN_PMC1); -SYSFS_PMCSETUP(pmc2, SPRN_PMC2); -SYSFS_PMCSETUP(pmc3, SPRN_PMC3); -SYSFS_PMCSETUP(pmc4, SPRN_PMC4); -SYSFS_PMCSETUP(pmc5, SPRN_PMC5); -SYSFS_PMCSETUP(pmc6, SPRN_PMC6); - -#ifdef HAS_PPC_PMC_G4 -SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2); +#ifdef CONFIG_PPC_BOOK3S_32 +#define HAS_PPC_PMC_G4 1 #endif +#endif /* CONFIG_PMU_SYSFS */ +#if defined(CONFIG_PPC64) && defined(CONFIG_DEBUG_MISC) +#define HAS_PPC_PA6T +#endif +/* + * SPRs which are not related to PMU. + */ #ifdef CONFIG_PPC64 -SYSFS_PMCSETUP(pmc7, SPRN_PMC7); -SYSFS_PMCSETUP(pmc8, SPRN_PMC8); - -SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); SYSFS_SPRSETUP(purr, SPRN_PURR); SYSFS_SPRSETUP(spurr, SPRN_SPURR); SYSFS_SPRSETUP(pir, SPRN_PIR); @@ -495,115 +595,38 @@ SYSFS_SPRSETUP(tscr, SPRN_TSCR); enable write when needed with a separate function. Lets be conservative and default to pseries. */ -static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); static DEVICE_ATTR(purr, 0400, show_purr, store_purr); static DEVICE_ATTR(pir, 0400, show_pir, NULL); static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr); +#endif /* CONFIG_PPC64 */ -/* - * This is the system wide DSCR register default value. Any - * change to this default value through the sysfs interface - * will update all per cpu DSCR default values across the - * system stored in their respective PACA structures. - */ -static unsigned long dscr_default; - -/** - * read_dscr() - Fetch the cpu specific DSCR default - * @val: Returned cpu specific DSCR default value - * - * This function returns the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. - */ -static void read_dscr(void *val) -{ - *(unsigned long *)val = get_paca()->dscr_default; -} - - -/** - * write_dscr() - Update the cpu specific DSCR default - * @val: New cpu specific DSCR default value to update - * - * This function updates the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. - */ -static void write_dscr(void *val) -{ - get_paca()->dscr_default = *(unsigned long *)val; - if (!current->thread.dscr_inherit) { - current->thread.dscr = *(unsigned long *)val; - mtspr(SPRN_DSCR, *(unsigned long *)val); - } -} - -SYSFS_SPRSETUP_SHOW_STORE(dscr); -static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); - -static void add_write_permission_dev_attr(struct device_attribute *attr) -{ - attr->attr.mode |= 0200; -} - -/** - * show_dscr_default() - Fetch the system wide DSCR default - * @dev: Device structure - * @attr: Device attribute structure - * @buf: Interface buffer - * - * This function returns the system wide DSCR default value. - */ -static ssize_t show_dscr_default(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "%lx\n", dscr_default); -} - -/** - * store_dscr_default() - Update the system wide DSCR default - * @dev: Device structure - * @attr: Device attribute structure - * @buf: Interface buffer - * @count: Size of the update - * - * This function updates the system wide DSCR default value. - */ -static ssize_t __used store_dscr_default(struct device *dev, - struct device_attribute *attr, const char *buf, - size_t count) -{ - unsigned long val; - int ret = 0; - - ret = sscanf(buf, "%lx", &val); - if (ret != 1) - return -EINVAL; - dscr_default = val; +#ifdef HAS_PPC_PMC_CLASSIC +SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0); +SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1); +SYSFS_PMCSETUP(pmc1, SPRN_PMC1); +SYSFS_PMCSETUP(pmc2, SPRN_PMC2); +SYSFS_PMCSETUP(pmc3, SPRN_PMC3); +SYSFS_PMCSETUP(pmc4, SPRN_PMC4); +SYSFS_PMCSETUP(pmc5, SPRN_PMC5); +SYSFS_PMCSETUP(pmc6, SPRN_PMC6); +#endif - on_each_cpu(write_dscr, &val, 1); +#ifdef HAS_PPC_PMC_G4 +SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2); +#endif - return count; -} +#ifdef HAS_PPC_PMC56 +SYSFS_PMCSETUP(pmc7, SPRN_PMC7); +SYSFS_PMCSETUP(pmc8, SPRN_PMC8); -static DEVICE_ATTR(dscr_default, 0600, - show_dscr_default, store_dscr_default); +SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); -static void sysfs_create_dscr_default(void) -{ - if (cpu_has_feature(CPU_FTR_DSCR)) { - int err = 0; - int cpu; +static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +#endif /* HAS_PPC_PMC56 */ - dscr_default = spr_default_dscr; - for_each_possible_cpu(cpu) - paca_ptrs[cpu]->dscr_default = dscr_default; - err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); - } -} -#endif /* CONFIG_PPC64 */ #ifdef HAS_PPC_PMC_PA6T SYSFS_PMCSETUP(pa6t_pmc0, SPRN_PA6T_PMC0); @@ -612,7 +635,9 @@ SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2); SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3); SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4); SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5); -#ifdef CONFIG_DEBUG_MISC +#endif + +#ifdef HAS_PPC_PA6T SYSFS_SPRSETUP(hid0, SPRN_HID0); SYSFS_SPRSETUP(hid1, SPRN_HID1); SYSFS_SPRSETUP(hid4, SPRN_HID4); @@ -641,15 +666,14 @@ SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0); SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1); SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2); SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3); -#endif /* CONFIG_DEBUG_MISC */ -#endif /* HAS_PPC_PMC_PA6T */ +#endif /* HAS_PPC_PA6T */ #ifdef HAS_PPC_PMC_IBM static struct device_attribute ibm_common_attrs[] = { __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), }; -#endif /* HAS_PPC_PMC_G4 */ +#endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 static struct device_attribute g4_common_attrs[] = { @@ -659,6 +683,7 @@ static struct device_attribute g4_common_attrs[] = { }; #endif /* HAS_PPC_PMC_G4 */ +#ifdef HAS_PPC_PMC_CLASSIC static struct device_attribute classic_pmc_attrs[] = { __ATTR(pmc1, 0600, show_pmc1, store_pmc1), __ATTR(pmc2, 0600, show_pmc2, store_pmc2), @@ -666,14 +691,16 @@ static struct device_attribute classic_pmc_attrs[] = { __ATTR(pmc4, 0600, show_pmc4, store_pmc4), __ATTR(pmc5, 0600, show_pmc5, store_pmc5), __ATTR(pmc6, 0600, show_pmc6, store_pmc6), -#ifdef CONFIG_PPC64 +#ifdef HAS_PPC_PMC56 __ATTR(pmc7, 0600, show_pmc7, store_pmc7), __ATTR(pmc8, 0600, show_pmc8, store_pmc8), #endif }; +#endif -#ifdef HAS_PPC_PMC_PA6T +#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T) static struct device_attribute pa6t_attrs[] = { +#ifdef HAS_PPC_PMC_PA6T __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), __ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0), @@ -682,7 +709,8 @@ static struct device_attribute pa6t_attrs[] = { __ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), __ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), __ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), -#ifdef CONFIG_DEBUG_MISC +#endif +#ifdef HAS_PPC_PA6T __ATTR(hid0, 0600, show_hid0, store_hid0), __ATTR(hid1, 0600, show_hid1, store_hid1), __ATTR(hid4, 0600, show_hid4, store_hid4), @@ -711,10 +739,9 @@ static struct device_attribute pa6t_attrs[] = { __ATTR(tsr1, 0600, show_tsr1, store_tsr1), __ATTR(tsr2, 0600, show_tsr2, store_tsr2), __ATTR(tsr3, 0600, show_tsr3, store_tsr3), -#endif /* CONFIG_DEBUG_MISC */ +#endif /* HAS_PPC_PA6T */ }; -#endif /* HAS_PPC_PMC_PA6T */ -#endif /* HAS_PPC_PMC_CLASSIC */ +#endif #ifdef CONFIG_PPC_SVM static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf) @@ -765,14 +792,14 @@ static int register_cpu_online(unsigned int cpu) pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ -#ifdef HAS_PPC_PMC_PA6T +#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); pmc_attrs = NULL; break; -#endif /* HAS_PPC_PMC_PA6T */ +#endif default: attrs = NULL; nattrs = 0; @@ -787,8 +814,10 @@ static int register_cpu_online(unsigned int cpu) device_create_file(s, &pmc_attrs[i]); #ifdef CONFIG_PPC64 +#ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_create_file(s, &dev_attr_mmcra); +#endif /* CONFIG_PMU_SYSFS */ if (cpu_has_feature(CPU_FTR_PURR)) { if (!firmware_has_feature(FW_FEATURE_LPAR)) @@ -854,14 +883,14 @@ static int unregister_cpu_online(unsigned int cpu) pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ -#ifdef HAS_PPC_PMC_PA6T +#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); pmc_attrs = NULL; break; -#endif /* HAS_PPC_PMC_PA6T */ +#endif default: attrs = NULL; nattrs = 0; @@ -876,8 +905,10 @@ static int unregister_cpu_online(unsigned int cpu) device_remove_file(s, &pmc_attrs[i]); #ifdef CONFIG_PPC64 +#ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_remove_file(s, &dev_attr_mmcra); +#endif /* CONFIG_PMU_SYSFS */ if (cpu_has_feature(CPU_FTR_PURR)) device_remove_file(s, &dev_attr_purr); diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 5b905a2f4e4dfc..d34276f3c495fe 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -16,25 +16,22 @@ #ifdef CONFIG_PPC64 .p2align 3 +#define __SYSCALL(nr, entry) .8byte entry +#else +#define __SYSCALL(nr, entry) .long entry #endif .globl sys_call_table sys_call_table: #ifdef CONFIG_PPC64 -#define __SYSCALL(nr, entry) .8byte DOTSYM(entry) #include -#undef __SYSCALL #else -#define __SYSCALL(nr, entry) .long entry #include -#undef __SYSCALL #endif #ifdef CONFIG_COMPAT .globl compat_sys_call_table compat_sys_call_table: #define compat_sys_sigsuspend sys_sigsuspend -#define __SYSCALL(nr, entry) .8byte DOTSYM(entry) #include -#undef __SYSCALL #endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1168e8b37e3069..bda9cb4a0a5f67 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -663,15 +663,6 @@ void timer_broadcast_interrupt(void) } #endif -/* - * Hypervisor decrementer interrupts shouldn't occur but are sometimes - * left pending on exit from a KVM guest. We don't need to do anything - * to clear them, as they are edge-triggered. - */ -void hdec_interrupt(struct pt_regs *regs) -{ -} - #ifdef CONFIG_SUSPEND static void generic_suspend_disable_irqs(void) { diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 82a3438300fd6d..3fca22276bb10c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2278,35 +2278,20 @@ void ppc_warn_emulated_print(const char *type) static int __init ppc_warn_emulated_init(void) { - struct dentry *dir, *d; + struct dentry *dir; unsigned int i; struct ppc_emulated_entry *entries = (void *)&ppc_emulated; - if (!powerpc_debugfs_root) - return -ENODEV; - dir = debugfs_create_dir("emulated_instructions", powerpc_debugfs_root); - if (!dir) - return -ENOMEM; - d = debugfs_create_u32("do_warn", 0644, dir, - &ppc_warn_emulated); - if (!d) - goto fail; + debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated); - for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) { - d = debugfs_create_u32(entries[i].name, 0644, dir, - (u32 *)&entries[i].val.counter); - if (!d) - goto fail; - } + for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) + debugfs_create_u32(entries[i].name, 0644, dir, + (u32 *)&entries[i].val.counter); return 0; - -fail: - debugfs_remove_recursive(dir); - return -ENOMEM; } device_initcall(ppc_warn_emulated_init); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index b9a108411c0d01..d3b77c15f9ce22 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -391,12 +391,7 @@ static unsigned long __init find_function64(struct lib64_elfinfo *lib, symname); return 0; } -#ifdef VDS64_HAS_DESCRIPTORS - return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) - - VDSO64_LBASE; -#else return sym->st_value - VDSO64_LBASE; -#endif } static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32, diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 25c14a0981bf2e..d20c5e79e03cde 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -134,7 +134,7 @@ _GLOBAL(load_up_vsx) /* enable use of VSX after return */ oris r12,r12,MSR_VSX@h std r12,_MSR(r1) - b fast_exception_return + b fast_interrupt_return #endif /* CONFIG_VSX */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b4c89a1acebb83..31a0f201fb6f43 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -256,6 +256,7 @@ SECTIONS *(.dynamic) } .hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) } + .gnu.hash : AT(ADDR(.gnu.hash) - LOAD_OFFSET) { *(.gnu.hash) } .interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) } .rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET) { diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 378f6108a41469..86380c69f5ce2a 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile @@ -3,9 +3,6 @@ # Makefile for the linux kernel. # -# Avoid clang warnings around longjmp/setjmp declarations -CFLAGS_crash.o += -ffreestanding - obj-y += core.o crash.o core_$(BITS).o obj-$(CONFIG_PPC32) += relocate_32.o diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 3aecec890d6fcc..6404df613ea36a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -2133,9 +2133,8 @@ static const struct file_operations debugfs_htab_fops = { void kvmppc_mmu_debugfs_init(struct kvm *kvm) { - kvm->arch.htab_dentry = debugfs_create_file("htab", 0400, - kvm->arch.debugfs_dir, kvm, - &debugfs_htab_fops); + debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm, + &debugfs_htab_fops); } void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 134fbc1f029f39..9f050064d2a2d7 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -1376,9 +1376,8 @@ static const struct file_operations debugfs_radix_fops = { void kvmhv_radix_debugfs_init(struct kvm *kvm) { - kvm->arch.radix_dentry = debugfs_create_file("radix", 0400, - kvm->arch.debugfs_dir, kvm, - &debugfs_radix_fops); + debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm, + &debugfs_radix_fops); } int kvmppc_radix_init(void) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index fa6e4fc7d0e474..93493f0cbfe8e9 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2267,14 +2267,9 @@ static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) struct kvm *kvm = vcpu->kvm; snprintf(buf, sizeof(buf), "vcpu%u", id); - if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir)) - return; vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir); - if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir)) - return; - vcpu->arch.debugfs_timings = - debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, - vcpu, &debugfs_timings_ops); + debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu, + &debugfs_timings_ops); } #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index dbc2fecc37f056..780a499c7114b0 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1266,7 +1266,6 @@ kvmppc_interrupt_hv: * R12 = (guest CR << 32) | interrupt vector * R13 = PACA * guest R12 saved in shadow VCPU SCRATCH0 - * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE * guest R13 saved in SPRN_SCRATCH0 */ std r9, HSTATE_SCRATCH2(r13) @@ -1367,12 +1366,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 11: stw r3,VCPU_HEIR(r9) /* these are volatile across C function calls */ -#ifdef CONFIG_RELOCATABLE - ld r3, HSTATE_SCRATCH1(r13) - mtctr r3 -#else mfctr r3 -#endif mfxer r4 std r3, VCPU_CTR(r9) std r4, VCPU_XER(r9) @@ -3258,7 +3252,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) * r12 is (CR << 32) | vector * r13 points to our PACA * r12 is saved in HSTATE_SCRATCH0(r13) - * ctr is saved in HSTATE_SCRATCH1(r13) if RELOCATABLE * r9 is saved in HSTATE_SCRATCH2(r13) * r13 is saved in HSPRG1 * cfar is saved in HSTATE_CFAR(r13) @@ -3307,11 +3300,7 @@ kvmppc_bad_host_intr: ld r5, HSTATE_CFAR(r13) std r5, ORIG_GPR3(r1) mflr r3 -#ifdef CONFIG_RELOCATABLE - ld r4, HSTATE_SCRATCH1(r13) -#else mfctr r4 -#endif mfxer r5 lbz r6, PACAIRQSOFTMASK(r13) std r3, _LINK(r1) diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 0169bab544dd7f..1f492aa4c8d6c5 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -167,16 +167,9 @@ kvmppc_interrupt_pr: * R12 = (guest CR << 32) | exit handler id * R13 = PACA * HSTATE.SCRATCH0 = guest R12 - * HSTATE.SCRATCH1 = guest CTR if RELOCATABLE */ #ifdef CONFIG_PPC64 /* Match 32-bit entry */ -#ifdef CONFIG_RELOCATABLE - std r9, HSTATE_SCRATCH2(r13) - ld r9, HSTATE_SCRATCH1(r13) - mtctr r9 - ld r9, HSTATE_SCRATCH2(r13) -#endif rotldi r12, r12, 32 /* Flip R12 halves for stw */ stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */ srdi r12, r12, 32 /* shift trap into low half */ diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index bfe4f106cffc7d..ba56a5cbba97f3 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -211,23 +211,14 @@ void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", current->pid, id); - debugfs_file = debugfs_create_file(dbg_fname, 0666, - kvm_debugfs_dir, vcpu, - &kvmppc_exit_timing_fops); - - if (!debugfs_file) { - printk(KERN_ERR"%s: error creating debugfs file %s\n", - __func__, dbg_fname); - return; - } + debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir, + vcpu, &kvmppc_exit_timing_fops); vcpu->arch.debugfs_exit_timing = debugfs_file; } void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) { - if (vcpu->arch.debugfs_exit_timing) { - debugfs_remove(vcpu->arch.debugfs_exit_timing); - vcpu->arch.debugfs_exit_timing = NULL; - } + debugfs_remove(vcpu->arch.debugfs_exit_timing); + vcpu->arch.debugfs_exit_timing = NULL; } diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c077acb983a19f..5f3a7bd9d90db9 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3179,8 +3179,9 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) * entry code works. If that is changed, this will * need to be changed also. */ - if (regs->gpr[0] == 0x1ebe && - cpu_has_feature(CPU_FTR_REAL_LE)) { + if (IS_ENABLED(CONFIG_PPC_FAST_ENDIAN_SWITCH) && + cpu_has_feature(CPU_FTR_REAL_LE) && + regs->gpr[0] == 0x1ebe) { regs->msr ^= MSR_LE; goto instr_done; } diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 42347067739ca3..53df4146dd325d 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -13,19 +13,20 @@ #include #define IMM_L(i) ((uintptr_t)(i) & 0xffff) +#define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) /* * Defined with TEST_ prefix so it does not conflict with other * definitions. */ #define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) + ___PPC_RA(base) | IMM_DS(i)) #define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) #define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b)) #define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) + ___PPC_RA(base) | IMM_DS(i)) #define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b) | \ __PPC_EH(eh)) @@ -160,7 +161,7 @@ static void __init test_std(void) /* std r5, 0(r3) */ stepped = emulate_step(®s, TEST_STD(5, 3, 0)); - if (stepped == 1 || regs.gpr[5] == a) + if (stepped == 1 && regs.gpr[5] == a) show_result("std", "PASS"); else show_result("std", "FAIL"); diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 2015c4f9623809..6d236080cb1ad0 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -35,7 +35,7 @@ mmu_hash_lock: /* * Load a PTE into the hash table, if possible. * The address is in r4, and r3 contains an access flag: - * _PAGE_RW (0x400) if a write. + * _PAGE_RW (0x002) if a write. * r9 contains the SRR1 value, from which we use the MSR_PR bit. * SPRG_THREAD contains the physical address of the current task's thread. * @@ -69,7 +69,7 @@ _GLOBAL(hash_page) blt+ 112f /* assume user more likely */ lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ - rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ + rlwimi r3,r9,32-14,31,31 /* MSR_PR -> _PAGE_USER */ 112: #ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ @@ -94,7 +94,7 @@ _GLOBAL(hash_page) #else rlwimi r8,r4,23,20,28 /* compute pte address */ #endif - rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + rlwinm r0,r3,6,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE /* @@ -310,11 +310,9 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) _GLOBAL(create_hpte) /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ - rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */ - and r8,r8,r0 /* writable if _RW & _DIRTY */ - rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ + and r8,r5,r0 /* writable if _RW & _DIRTY */ + rlwimi r5,r5,1,30,30 /* _PAGE_USER -> PP msb */ ori r8,r8,0xe04 /* clear out reserved bits */ andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */ BEGIN_FTR_SECTION @@ -566,7 +564,7 @@ _GLOBAL(flush_hash_pages) 33: lwarx r8,0,r5 /* fetch the pte flags word */ andi. r0,r8,_PAGE_HASHPTE beq 8f /* done if HASHPTE is already clear */ - rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */ + rlwinm r8,r8,0,~_PAGE_HASHPTE /* clear HASHPTE bit */ stwcx. r8,0,r5 /* update the pte */ bne- 33b @@ -690,18 +688,21 @@ _GLOBAL(_tlbia) bne- 10b stwcx. r8,0,r9 bne- 10b +#endif /* CONFIG_SMP */ + li r5, 32 + lis r4, KERNELBASE@h + mtctr r5 sync - tlbia +0: tlbie r4 + addi r4, r4, 0x1000 + bdnz 0b sync +#ifdef CONFIG_SMP TLBSYNC li r0,0 stw r0,0(r9) /* clear mmu_hash_lock */ mtmsr r10 SYNC_601 isync -#else /* CONFIG_SMP */ - sync - tlbia - sync #endif /* CONFIG_SMP */ blr diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index f888cbb109b913..39ba53ca5bb556 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -312,7 +312,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea) if (!Hash) return; - pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); + pmd = pmd_ptr(mm, ea); if (!pmd_none(*pmd)) add_hash_page(mm->context.id, ea, pmd_val(*pmd)); } diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index 2fcd321040ff38..dc9039a170aac9 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -79,15 +79,18 @@ static void flush_range(struct mm_struct *mm, unsigned long start, int count; unsigned int ctx = mm->context.id; + start &= PAGE_MASK; if (!Hash) { - _tlbia(); + if (end - start <= PAGE_SIZE) + _tlbie(start); + else + _tlbia(); return; } - start &= PAGE_MASK; if (start >= end) return; end = (end - 1) | ~PAGE_MASK; - pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); + pmd = pmd_ptr(mm, start); for (;;) { pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; if (pmd_end > end) @@ -145,7 +148,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) return; } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; - pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); + pmd = pmd_ptr(mm, vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 523d4d39d11e8c..7e5714a69a5842 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -2018,11 +2018,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n") static int __init hash64_debugfs(void) { - if (!debugfs_create_file_unsafe("hpt_order", 0600, powerpc_debugfs_root, - NULL, &fops_hpt_order)) { - pr_err("lpar: unable to create hpt_order debugsfs file\n"); - } - + debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root, NULL, + &fops_hpt_order); return 0; } machine_device_initcall(pseries, hash64_debugfs); diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index eba73ebd8ae57b..fa05bbd1f6829c 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -121,24 +121,6 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, goto free_exit; } - pageshift = PAGE_SHIFT; - for (i = 0; i < entries; ++i) { - struct page *page = mem->hpages[i]; - - /* - * Allow to use larger than 64k IOMMU pages. Only do that - * if we are backed by hugetlb. - */ - if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) - pageshift = page_shift(compound_head(page)); - mem->pageshift = min(mem->pageshift, pageshift); - /* - * We don't need struct page reference any more, switch - * to physical address. - */ - mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; - } - good_exit: atomic64_set(&mem->mapped, 1); mem->used = 1; @@ -158,6 +140,27 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, } } + if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { + /* + * Allow to use larger than 64k IOMMU pages. Only do that + * if we are backed by hugetlb. Skip device memory as it is not + * backed with page structs. + */ + pageshift = PAGE_SHIFT; + for (i = 0; i < entries; ++i) { + struct page *page = mem->hpages[i]; + + if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) + pageshift = page_shift(compound_head(page)); + mem->pageshift = min(mem->pageshift, pageshift); + /* + * We don't need struct page reference any more, switch + * to physical address. + */ + mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; + } + } + list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); mutex_unlock(&mem_list_mutex); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index dd1bea45325c6e..2a9a0cd7949009 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 03f43c924e0094..758ade2c2b6ed3 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -587,6 +587,11 @@ void radix__local_flush_all_mm(struct mm_struct *mm) preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_all_mm); + +static void __flush_all_mm(struct mm_struct *mm, bool fullmm) +{ + radix__local_flush_all_mm(mm); +} #endif /* CONFIG_SMP */ void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, @@ -777,7 +782,7 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) EXPORT_SYMBOL(radix__flush_tlb_page); #else /* CONFIG_SMP */ -#define radix__flush_all_mm radix__local_flush_all_mm +static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } #endif /* CONFIG_SMP */ static void do_tlbiel_kernel(void *info) diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index d2bed3fcb7194e..cbcad369fcb22a 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -36,7 +36,7 @@ static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned unsigned long k_cur, k_next; pte_t *new = NULL; - pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); + pmd = pmd_ptr_k(k_start); for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { k_next = pgd_addr_end(k_cur, k_end); @@ -78,7 +78,7 @@ static int __init kasan_init_region(void *start, size_t size) block = memblock_alloc(k_end - k_start, PAGE_SIZE); for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + pmd_t *pmd = pmd_ptr_k(k_cur); void *va = block + k_cur - k_start; pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); @@ -101,8 +101,8 @@ static void __init kasan_remap_early_shadow_ro(void) kasan_populate_pte(kasan_early_shadow_pte, prot); - for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + for (k_cur = k_start & PAGE_MASK; k_cur != k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_ptr_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) @@ -196,7 +196,7 @@ void __init kasan_early_init(void) unsigned long addr = KASAN_SHADOW_START; unsigned long end = KASAN_SHADOW_END; unsigned long next; - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr); + pmd_t *pmd = pmd_ptr_k(addr); BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1c07d5a3f54318..9b4f5fb719e0f2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -66,12 +66,6 @@ pte_t *kmap_pte; EXPORT_SYMBOL(kmap_pte); pgprot_t kmap_prot; EXPORT_SYMBOL(kmap_prot); - -static inline pte_t *virt_to_kpte(unsigned long vaddr) -{ - return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), - vaddr), vaddr), vaddr); -} #endif pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index f348104eb461d9..82862723ab42fd 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -104,7 +104,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); + pmdp = pmd_ptr_k(v); *pmdp++ = __pmd(val); *pmdp++ = __pmd(val); *pmdp++ = __pmd(val); @@ -119,7 +119,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); + pmdp = pmd_ptr_k(v); *pmdp = __pmd(val); v += LARGE_PAGE_SIZE_4M; diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index 2ca407cedbe79b..eaeee402f96e85 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -397,7 +397,7 @@ _GLOBAL(set_context) * extern void loadcam_entry(unsigned int index) * * Load TLBCAM[index] entry in to the L2 CAM MMU - * Must preserve r7, r8, r9, and r10 + * Must preserve r7, r8, r9, r10 and r11 */ _GLOBAL(loadcam_entry) mflr r5 @@ -433,6 +433,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) */ _GLOBAL(loadcam_multi) mflr r8 + /* Don't switch to AS=1 if already there */ + mfmsr r11 + andi. r11,r11,MSR_IS + bne 10f /* * Set up temporary TLB entry that is the same as what we're @@ -458,6 +462,7 @@ _GLOBAL(loadcam_multi) mtmsr r6 isync +10: mr r9,r3 add r10,r3,r4 2: bl loadcam_entry @@ -466,6 +471,10 @@ _GLOBAL(loadcam_multi) mr r3,r9 blt 2b + /* Don't return to AS=0 if we were in AS=1 at function start */ + andi. r11,r11,MSR_IS + bne 3f + /* Return to AS=0 and clear the temporary entry */ mfmsr r6 rlwinm. r6,r6,0,~(MSR_IS|MSR_DS) @@ -481,6 +490,7 @@ _GLOBAL(loadcam_multi) tlbwe isync +3: mtlr r8 blr #endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3c7dec70cda093..9fcf2d19583004 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -461,25 +461,69 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb) return nid; } +#ifdef CONFIG_PPC_SPLPAR +static int vphn_get_nid(long lcpu) +{ + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; + long rc, hwid; + + /* + * On a shared lpar, device tree will not have node associativity. + * At this time lppaca, or its __old_status field may not be + * updated. Hence kernel cannot detect if its on a shared lpar. So + * request an explicit associativity irrespective of whether the + * lpar is shared or dedicated. Use the device tree property as a + * fallback. cpu_to_phys_id is only valid between + * smp_setup_cpu_maps() and smp_setup_pacas(). + */ + if (firmware_has_feature(FW_FEATURE_VPHN)) { + if (cpu_to_phys_id) + hwid = cpu_to_phys_id[lcpu]; + else + hwid = get_hard_smp_processor_id(lcpu); + + rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity); + if (rc == H_SUCCESS) + return associativity_to_nid(associativity); + } + + return NUMA_NO_NODE; +} +#else +static int vphn_get_nid(long unused) +{ + return NUMA_NO_NODE; +} +#endif /* CONFIG_PPC_SPLPAR */ + /* * Figure out to which domain a cpu belongs and stick it there. * Return the id of the domain used. */ static int numa_setup_cpu(unsigned long lcpu) { - int nid = NUMA_NO_NODE; struct device_node *cpu; + int fcpu = cpu_first_thread_sibling(lcpu); + int nid = NUMA_NO_NODE; /* * If a valid cpu-to-node mapping is already available, use it * directly instead of querying the firmware, since it represents * the most recent mapping notified to us by the platform (eg: VPHN). + * Since cpu_to_node binding remains the same for all threads in the + * core. If a valid cpu-to-node mapping is already available, for + * the first thread in the core, use it. */ - if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) { + nid = numa_cpu_lookup_table[fcpu]; + if (nid >= 0) { map_cpu_to_node(lcpu, nid); return nid; } + nid = vphn_get_nid(lcpu); + if (nid != NUMA_NO_NODE) + goto out_present; + cpu = of_get_cpu_node(lcpu, NULL); if (!cpu) { @@ -491,13 +535,26 @@ static int numa_setup_cpu(unsigned long lcpu) } nid = of_node_to_nid_single(cpu); + of_node_put(cpu); out_present: if (nid < 0 || !node_possible(nid)) nid = first_online_node; + /* + * Update for the first thread of the core. All threads of a core + * have to be part of the same node. This not only avoids querying + * for every other thread in the core, but always avoids a case + * where virtual node associativity change causes subsequent threads + * of a core to be associated with different nid. However if first + * thread is already online, expect it to have a valid mapping. + */ + if (fcpu != lcpu) { + WARN_ON(cpu_online(fcpu)); + map_cpu_to_node(fcpu, nid); + } + map_cpu_to_node(lcpu, nid); - of_node_put(cpu); out: return nid; } @@ -1191,23 +1248,30 @@ static long vphn_get_associativity(unsigned long cpu, VPHN_FLAG_VCPU, associativity); switch (rc) { + case H_SUCCESS: + dbg("VPHN hcall succeeded. Reset polling...\n"); + timed_topology_update(0); + goto out; + case H_FUNCTION: - printk_once(KERN_INFO - "VPHN is not supported. Disabling polling...\n"); - stop_topology_update(); + pr_err_ratelimited("VPHN unsupported. Disabling polling...\n"); break; case H_HARDWARE: - printk(KERN_ERR - "hcall_vphn() experienced a hardware fault " + pr_err_ratelimited("hcall_vphn() experienced a hardware fault " "preventing VPHN. Disabling polling...\n"); - stop_topology_update(); break; - case H_SUCCESS: - dbg("VPHN hcall succeeded. Reset polling...\n"); - timed_topology_update(0); + case H_PARAMETER: + pr_err_ratelimited("hcall_vphn() was passed an invalid parameter. " + "Disabling polling...\n"); + break; + default: + pr_err_ratelimited("hcall_vphn() returned %ld. Disabling polling...\n" + , rc); break; } + stop_topology_update(); +out: return rc; } @@ -1568,15 +1632,6 @@ int prrn_is_enabled(void) return prrn_enabled; } -void __init shared_proc_topology_init(void) -{ - if (lppaca_shared_proc(get_lppaca())) { - bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask), - nr_cpumask_bits); - numa_update_cpu_topology(false); - } -} - static int topology_read(struct seq_file *file, void *v) { if (vphn_enabled || prrn_enabled) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 5fb90edd865e67..f62de06e3d07c9 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -63,7 +63,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) int err = -ENOMEM; /* Use upper 10 bits of VA to index the first level map */ - pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va); + pd = pmd_ptr_k(va); /* Use middle 10 bits of VA to index the second-level map */ if (likely(slab_is_available())) pg = pte_alloc_kernel(pd, va); @@ -121,44 +121,9 @@ void __init mapin_ram(void) } } -/* Scan the real Linux page tables and return a PTE pointer for - * a virtual address in a context. - * Returns true (1) if PTE was found, zero otherwise. The pointer to - * the PTE pointer is unmodified if PTE is not found. - */ -static int -get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int retval = 0; - - pgd = pgd_offset(mm, addr & PAGE_MASK); - if (pgd) { - pud = pud_offset(pgd, addr & PAGE_MASK); - if (pud && pud_present(*pud)) { - pmd = pmd_offset(pud, addr & PAGE_MASK); - if (pmd_present(*pmd)) { - pte = pte_offset_map(pmd, addr & PAGE_MASK); - if (pte) { - retval = 1; - *ptep = pte; - if (pmdp) - *pmdp = pmd; - /* XXX caller needs to do pte_unmap, yuck */ - } - } - } - } - return(retval); -} - static int __change_page_attr_noflush(struct page *page, pgprot_t prot) { pte_t *kpte; - pmd_t *kpmd; unsigned long address; BUG_ON(PageHighMem(page)); @@ -166,10 +131,10 @@ static int __change_page_attr_noflush(struct page *page, pgprot_t prot) if (v_block_mapped(address)) return 0; - if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) + kpte = virt_to_kpte(address); + if (!kpte) return -EINVAL; __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); - pte_unmap(kpte); return 0; } diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c index 4154feac1da340..d3a5d6b318d186 100644 --- a/arch/powerpc/mm/ptdump/bats.c +++ b/arch/powerpc/mm/ptdump/bats.c @@ -164,10 +164,8 @@ static const struct file_operations bats_fops = { static int __init bats_init(void) { - struct dentry *debugfs_file; - - debugfs_file = debugfs_create_file("block_address_translation", 0400, - powerpc_debugfs_root, NULL, &bats_fops); - return debugfs_file ? 0 : -ENOMEM; + debugfs_create_file("block_address_translation", 0400, + powerpc_debugfs_root, NULL, &bats_fops); + return 0; } device_initcall(bats_init); diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index a07278027c6f4a..b6ed9578382ff8 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -527,13 +527,10 @@ static const struct file_operations ptdump_fops = { static int ptdump_init(void) { - struct dentry *debugfs_file; - if (!radix_enabled()) { populate_markers(); - debugfs_file = debugfs_create_file("kernel_hash_pagetable", - 0400, NULL, NULL, &ptdump_fops); - return debugfs_file ? 0 : -ENOMEM; + debugfs_create_file("kernel_hash_pagetable", 0400, NULL, NULL, + &ptdump_fops); } return 0; } diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 20615625524770..d92bb8ea229c53 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -417,12 +417,10 @@ void ptdump_check_wx(void) static int ptdump_init(void) { - struct dentry *debugfs_file; - populate_markers(); build_pgtable_complete_mask(); - debugfs_file = debugfs_create_file("kernel_page_tables", 0400, NULL, - NULL, &ptdump_fops); - return debugfs_file ? 0 : -ENOMEM; + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + &ptdump_fops); + return 0; } device_initcall(ptdump_init); diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c index 501843664bb91b..dde2fe8de4b2a8 100644 --- a/arch/powerpc/mm/ptdump/segment_regs.c +++ b/arch/powerpc/mm/ptdump/segment_regs.c @@ -55,10 +55,8 @@ static const struct file_operations sr_fops = { static int __init sr_init(void) { - struct dentry *debugfs_file; - - debugfs_file = debugfs_create_file("segment_registers", 0400, - powerpc_debugfs_root, NULL, &sr_fops); - return debugfs_file ? 0 : -ENOMEM; + debugfs_create_file("segment_registers", 0400, powerpc_debugfs_root, + NULL, &sr_fops); + return 0; } device_initcall(sr_init); diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 6620b64e496320..665f18e37efb29 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -43,9 +43,6 @@ static int __init warp_probe(void) if (!of_machine_is_compatible("pika,warp")) return 0; - /* For arch_dma_alloc */ - ISA_DMA_THRESHOLD = ~0L; - return 1; } diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 61538869e88a27..4514a6f7458a27 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -205,7 +205,6 @@ static int __init efika_probe(void) if (strcmp(model, "EFIKA5K2")) return 0; - ISA_DMA_THRESHOLD = ~0L; DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index ada42f03915acc..bcdc2c203ec939 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -53,17 +53,19 @@ static void quirk_mpc8360e_qe_enet10(void) np_par = of_find_node_by_name(NULL, "par_io"); if (np_par == NULL) { - pr_warn("%s couldn;t find par_io node\n", __func__); + pr_warn("%s couldn't find par_io node\n", __func__); return; } /* Map Parallel I/O ports registers */ ret = of_address_to_resource(np_par, 0, &res); if (ret) { - pr_warn("%s couldn;t map par_io registers\n", __func__); - return; + pr_warn("%s couldn't map par_io registers\n", __func__); + goto out; } base = ioremap(res.start, resource_size(&res)); + if (!base) + goto out; /* * set output delay adjustments to default values according @@ -111,6 +113,7 @@ static void quirk_mpc8360e_qe_enet10(void) setbits32((base + 0xac), 0x0000c000); } iounmap(base); +out: of_node_put(np_par); } diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 6b1436abe9b14b..915ab6710b93e6 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -218,12 +218,6 @@ static irqreturn_t mpc85xx_8259_cascade_action(int irq, void *dev_id) { return IRQ_HANDLED; } - -static struct irqaction mpc85xxcds_8259_irqaction = { - .handler = mpc85xx_8259_cascade_action, - .flags = IRQF_SHARED | IRQF_NO_THREAD, - .name = "8259 cascade", -}; #endif /* PPC_I8259 */ #endif /* CONFIG_PCI */ @@ -271,7 +265,10 @@ static int mpc85xx_cds_8259_attach(void) * disabled when the last user of the shared IRQ line frees their * interrupt. */ - if ((ret = setup_irq(cascade_irq, &mpc85xxcds_8259_irqaction))) { + ret = request_irq(cascade_irq, mpc85xx_8259_cascade_action, + IRQF_SHARED | IRQF_NO_THREAD, "8259 cascade", + cascade_node); + if (ret) { printk(KERN_ERR "Failed to setup cascade interrupt\n"); return ret; } diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index a43ee7d1ff856a..4db4ca2e1222f8 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -120,12 +120,6 @@ static irqreturn_t cpm_error_interrupt(int irq, void *dev) return IRQ_HANDLED; } -static struct irqaction cpm_error_irqaction = { - .handler = cpm_error_interrupt, - .flags = IRQF_NO_THREAD, - .name = "error", -}; - static const struct irq_domain_ops cpm_pic_host_ops = { .map = cpm_pic_host_map, }; @@ -187,7 +181,8 @@ unsigned int __init cpm_pic_init(void) if (!eirq) goto end; - if (setup_irq(eirq, &cpm_error_irqaction)) + if (request_irq(eirq, cpm_error_interrupt, IRQF_NO_THREAD, "error", + NULL)) printk(KERN_ERR "Could not allocate CPM error IRQ!"); setbits32(&cpic_reg->cpic_cicr, CICR_IEN); diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index f1c805c8adbcd3..df4d57d07f9a04 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -39,12 +39,6 @@ static irqreturn_t timebase_interrupt(int irq, void *dev) return IRQ_HANDLED; } -static struct irqaction tbint_irqaction = { - .handler = timebase_interrupt, - .flags = IRQF_NO_THREAD, - .name = "tbint", -}; - /* per-board overridable init_internal_rtc() function. */ void __init __attribute__ ((weak)) init_internal_rtc(void) @@ -157,7 +151,8 @@ void __init mpc8xx_calibrate_decr(void) (TBSCR_TBF | TBSCR_TBE)); immr_unmap(sys_tmr2); - if (setup_irq(virq, &tbint_irqaction)) + if (request_irq(virq, timebase_interrupt, IRQF_NO_THREAD, "tbint", + NULL)) panic("Could not allocate timer IRQ!"); } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 6caedc88474f98..0c3c1902135c4f 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -397,7 +397,7 @@ config PPC_KUAP config PPC_KUAP_DEBUG bool "Extra debugging for Kernel Userspace Access Protection" - depends on PPC_HAVE_KUAP && (PPC_RADIX_MMU || PPC_32) + depends on PPC_KUAP && (PPC_RADIX_MMU || PPC_32) help Add extra debugging for Kernel Userspace Access Protection (KUAP) If you're unsure, say N. @@ -425,6 +425,12 @@ config PPC_MM_SLICES config PPC_HAVE_PMU_SUPPORT bool +config PMU_SYSFS + bool "Create PMU SPRs sysfs file" + default n + help + This option enables sysfs file creation for PMU SPRs like MMCR* and PMC*. + config PPC_PERF_CTRS def_bool y depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index ea5e45e326837f..f5d0bf9997596e 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -146,7 +146,6 @@ static int __init amigaone_probe(void) */ cur_cpu_spec->cpu_features &= ~CPU_FTR_NEED_COHERENT; - ISA_DMA_THRESHOLD = 0x00ffffff; DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 57c4e0e86c8849..ca2555b8a0c2aa 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -480,10 +480,6 @@ void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic) snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn)); - if (!debugfs_create_file(name, 0600, powerpc_debugfs_root, - msic, &fops_msic)) { - pr_devel("axon_msi: debugfs_create_file failed!\n"); - return; - } + debugfs_create_file(name, 0600, powerpc_debugfs_root, msic, &fops_msic); } #endif /* DEBUG */ diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 5c3f5d088c3b63..d56b4e3241cd4a 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -177,7 +177,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == MFC_CNTL_SUSPEND_COMPLETE); - /* fall through */ + fallthrough; case MFC_CNTL_SUSPEND_COMPLETE: if (csa) csa->priv2.mfc_control_RW = diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index fcf6f2342ef4c2..65a7e01a8f7d20 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -451,13 +451,6 @@ static void __init chrp_find_openpic(void) of_node_put(np); } -#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON) -static struct irqaction xmon_irqaction = { - .handler = xmon_irq, - .name = "XMON break", -}; -#endif - static void __init chrp_find_8259(void) { struct device_node *np, *pic = NULL; @@ -541,8 +534,11 @@ static void __init chrp_init_IRQ(void) if (of_node_is_type(kbd->parent, "adb")) break; of_node_put(kbd); - if (kbd) - setup_irq(HYDRA_INT_ADB_NMI, &xmon_irqaction); + if (kbd) { + if (request_irq(HYDRA_INT_ADB_NMI, xmon_irq, 0, "XMON break", + NULL)) + pr_err("Failed to register XMON break interrupt\n"); + } #endif } @@ -573,7 +569,6 @@ static int __init chrp_probe(void) if (strcmp(dtype, "chrp")) return 0; - ISA_DMA_THRESHOLD = ~0L; DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 6f019df37916ff..15b2c6eb506d08 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -291,23 +291,6 @@ static int __init maple_probe(void) return 1; } -define_machine(maple) { - .name = "Maple", - .probe = maple_probe, - .setup_arch = maple_setup_arch, - .init_IRQ = maple_init_IRQ, - .pci_irq_fixup = maple_pci_irq_fixup, - .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, - .restart = maple_restart, - .halt = maple_halt, - .get_boot_time = maple_get_boot_time, - .set_rtc_time = maple_set_rtc_time, - .get_rtc_time = maple_get_rtc_time, - .calibrate_decr = generic_calibrate_decr, - .progress = maple_progress, - .power_save = power4_idle, -}; - #ifdef CONFIG_EDAC /* * Register a platform device for CPC925 memory controller on @@ -364,3 +347,20 @@ static int __init maple_cpc925_edac_setup(void) } machine_device_initcall(maple, maple_cpc925_edac_setup); #endif + +define_machine(maple) { + .name = "Maple", + .probe = maple_probe, + .setup_arch = maple_setup_arch, + .init_IRQ = maple_init_IRQ, + .pci_irq_fixup = maple_pci_irq_fixup, + .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, + .restart = maple_restart, + .halt = maple_halt, + .get_boot_time = maple_get_boot_time, + .set_rtc_time = maple_set_rtc_time, + .get_rtc_time = maple_get_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = maple_progress, + .power_save = power4_idle, +}; diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 2e969073473de6..4921bccf0376be 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -250,20 +250,6 @@ static unsigned int pmac_pic_get_irq(void) return irq_linear_revmap(pmac_pic_host, irq); } -#ifdef CONFIG_XMON -static struct irqaction xmon_action = { - .handler = xmon_irq, - .flags = IRQF_NO_THREAD, - .name = "NMI - XMON" -}; -#endif - -static struct irqaction gatwick_cascade_action = { - .handler = gatwick_action, - .flags = IRQF_NO_THREAD, - .name = "cascade", -}; - static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node, enum irq_domain_bus_token bus_token) { @@ -384,12 +370,17 @@ static void __init pmac_pic_probe_oldstyle(void) out_le32(&pmac_irq_hw[i]->enable, 0); /* Hookup cascade irq */ - if (slave && pmac_irq_cascade) - setup_irq(pmac_irq_cascade, &gatwick_cascade_action); + if (slave && pmac_irq_cascade) { + if (request_irq(pmac_irq_cascade, gatwick_action, + IRQF_NO_THREAD, "cascade", NULL)) + pr_err("Failed to register cascade interrupt\n"); + } printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs); #ifdef CONFIG_XMON - setup_irq(irq_create_mapping(NULL, 20), &xmon_action); + i = irq_create_mapping(NULL, 20); + if (request_irq(i, xmon_irq, IRQF_NO_THREAD, "NMI - XMON", NULL)) + pr_err("Failed to register NMI-XMON interrupt\n"); #endif } @@ -441,7 +432,9 @@ static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic) nmi_irq = irq_of_parse_and_map(pswitch, 0); if (nmi_irq) { mpic_irq_set_priority(nmi_irq, 9); - setup_irq(nmi_irq, &xmon_action); + if (request_irq(nmi_irq, xmon_irq, IRQF_NO_THREAD, + "NMI - XMON", NULL)) + pr_err("Failed to register NMI-XMON interrupt\n"); } of_node_put(pswitch); } diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index c6d5333729ed77..95fb4feb6cccd0 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -586,7 +586,6 @@ static int __init pmac_probe(void) #ifdef CONFIG_PPC32 /* isa_io_base gets set in pmac_pci_init */ - ISA_DMA_THRESHOLD = ~0L; DMA_MODE_READ = 1; DMA_MODE_WRITE = 2; #endif /* CONFIG_PPC32 */ diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index f95fbdee6efe93..be2ab5b11e57ed 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -399,21 +399,19 @@ static int __init smp_psurge_kick_cpu(int nr) return 0; } -static struct irqaction psurge_irqaction = { - .handler = psurge_ipi_intr, - .flags = IRQF_PERCPU | IRQF_NO_THREAD, - .name = "primary IPI", -}; - static void __init smp_psurge_setup_cpu(int cpu_nr) { + unsigned long flags = IRQF_PERCPU | IRQF_NO_THREAD; + int irq; + if (cpu_nr != 0 || !psurge_start) return; /* reset the entry point so if we get another intr we won't * try to startup again */ out_be32(psurge_start, 0x100); - if (setup_irq(irq_create_mapping(NULL, 30), &psurge_irqaction)) + irq = irq_create_mapping(NULL, 30); + if (request_irq(irq, psurge_ipi_intr, flags, "primary IPI", NULL)) printk(KERN_ERR "Couldn't get primary IPI interrupt"); } @@ -660,13 +658,13 @@ static void smp_core99_gpio_tb_freeze(int freeze) #endif /* !CONFIG_PPC64 */ -/* L2 and L3 cache settings to pass from CPU0 to CPU1 on G4 cpus */ -volatile static long int core99_l2_cache; -volatile static long int core99_l3_cache; - static void core99_init_caches(int cpu) { #ifndef CONFIG_PPC64 + /* L2 and L3 cache settings to pass from CPU0 to CPU1 on G4 cpus */ + static long int core99_l2_cache; + static long int core99_l3_cache; + if (!cpu_has_feature(CPU_FTR_L2CR)) return; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 6f300ab7f0e9b3..79409e005fcde9 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -40,15 +40,8 @@ static int eeh_event_irq = -EINVAL; void pnv_pcibios_bus_add_device(struct pci_dev *pdev) { - struct pci_dn *pdn = pci_get_pdn(pdev); - - if (!pdn || eeh_has_flag(EEH_FORCE_DISABLED)) - return; - dev_dbg(&pdev->dev, "EEH: Setting up device\n"); - eeh_add_device_early(pdn); - eeh_add_device_late(pdev); - eeh_sysfs_add_device(pdev); + eeh_probe_device(pdev); } static int pnv_eeh_init(void) @@ -347,23 +340,13 @@ static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap) /** * pnv_eeh_probe - Do probe on PCI device - * @pdn: PCI device node - * @data: unused + * @pdev: pci_dev to probe * - * When EEH module is installed during system boot, all PCI devices - * are checked one by one to see if it supports EEH. The function - * is introduced for the purpose. By default, EEH has been enabled - * on all PCI devices. That's to say, we only need do necessary - * initialization on the corresponding eeh device and create PE - * accordingly. - * - * It's notable that's unsafe to retrieve the EEH device through - * the corresponding PCI device. During the PCI device hotplug, which - * was possiblly triggered by EEH core, the binding between EEH device - * and the PCI device isn't built yet. + * Create, or find the existing, eeh_dev for this pci_dev. */ -static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) +static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) { + struct pci_dn *pdn = pci_get_pdn(pdev); struct pci_controller *hose = pdn->phb; struct pnv_phb *phb = hose->private_data; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -380,6 +363,14 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) if (!edev || edev->pe) return NULL; + /* already configured? */ + if (edev->pdev) { + pr_debug("%s: found existing edev for %04x:%02x:%02x.%01x\n", + __func__, hose->global_number, config_addr >> 8, + PCI_SLOT(config_addr), PCI_FUNC(config_addr)); + return edev; + } + /* Skip for PCI-ISA bridge */ if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) return NULL; @@ -471,7 +462,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) eeh_edev_dbg(edev, "EEH enabled on device\n"); - return NULL; + return edev; } /** diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index eb2e75dac369a4..d6d64f8718e6f3 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -187,11 +187,6 @@ static int memtrace_init_debugfs(void) snprintf(ent->name, 16, "%08x", ent->nid); dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); - if (!dir) { - pr_err("Failed to create debugfs directory for node %d\n", - ent->nid); - return -1; - } ent->dir = dir; debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); @@ -314,8 +309,6 @@ static int memtrace_init(void) { memtrace_debugfs_dir = debugfs_create_dir("memtrace", powerpc_debugfs_root); - if (!memtrace_debugfs_dir) - return -1; debugfs_create_file("enable", 0600, memtrace_debugfs_dir, NULL, &memtrace_init_fops); diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index ed895d82c048de..6dba3b62269fe1 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -71,6 +71,7 @@ static LIST_HEAD(opalcore_list); static struct opalcore_config *oc_conf; static const struct opal_mpipl_fadump *opalc_metadata; static const struct opal_mpipl_fadump *opalc_cpu_metadata; +struct kobject *mpipl_kobj; /* * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered @@ -428,7 +429,7 @@ static void opalcore_cleanup(void) return; /* Remove OPAL core sysfs file */ - sysfs_remove_bin_file(opal_kobj, &opal_core_attr); + sysfs_remove_bin_file(mpipl_kobj, &opal_core_attr); oc_conf->ptload_phdr = NULL; oc_conf->ptload_cnt = 0; @@ -563,9 +564,9 @@ static void __init opalcore_config_init(void) of_node_put(np); } -static ssize_t fadump_release_opalcore_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) +static ssize_t release_core_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) { int input = -1; @@ -589,9 +590,23 @@ static ssize_t fadump_release_opalcore_store(struct kobject *kobj, return count; } -static struct kobj_attribute opalcore_rel_attr = __ATTR(fadump_release_opalcore, - 0200, NULL, - fadump_release_opalcore_store); +static struct kobj_attribute opalcore_rel_attr = __ATTR_WO(release_core); + +static struct attribute *mpipl_attr[] = { + &opalcore_rel_attr.attr, + NULL, +}; + +static struct bin_attribute *mpipl_bin_attr[] = { + &opal_core_attr, + NULL, + +}; + +static struct attribute_group mpipl_group = { + .attrs = mpipl_attr, + .bin_attrs = mpipl_bin_attr, +}; static int __init opalcore_init(void) { @@ -609,7 +624,7 @@ static int __init opalcore_init(void) * then capture the dump. */ if (!(is_opalcore_usable())) { - pr_err("Failed to export /sys/firmware/opal/core\n"); + pr_err("Failed to export /sys/firmware/opal/mpipl/core\n"); opalcore_cleanup(); return rc; } @@ -617,18 +632,28 @@ static int __init opalcore_init(void) /* Set OPAL core file size */ opal_core_attr.size = oc_conf->opalcore_size; + mpipl_kobj = kobject_create_and_add("mpipl", opal_kobj); + if (!mpipl_kobj) { + pr_err("unable to create mpipl kobject\n"); + return -ENOMEM; + } + /* Export OPAL core sysfs file */ - rc = sysfs_create_bin_file(opal_kobj, &opal_core_attr); - if (rc != 0) { - pr_err("Failed to export /sys/firmware/opal/core\n"); + rc = sysfs_create_group(mpipl_kobj, &mpipl_group); + if (rc) { + pr_err("mpipl sysfs group creation failed (%d)", rc); opalcore_cleanup(); return rc; } - - rc = sysfs_create_file(kernel_kobj, &opalcore_rel_attr.attr); + /* The /sys/firmware/opal/core is moved to /sys/firmware/opal/mpipl/ + * directory, need to create symlink at old location to maintain + * backward compatibility. + */ + rc = compat_only_sysfs_link_entry_to_kobj(opal_kobj, mpipl_kobj, + "core", NULL); if (rc) { - pr_warn("unable to create sysfs file fadump_release_opalcore (%d)\n", - rc); + pr_err("unable to create core symlink (%d)\n", rc); + return rc; } return 0; diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 000b350d4060ce..968b9a4d1cd936 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -35,11 +35,10 @@ static int imc_mem_set(void *data, u64 val) } DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n"); -static struct dentry *imc_debugfs_create_x64(const char *name, umode_t mode, - struct dentry *parent, u64 *value) +static void imc_debugfs_create_x64(const char *name, umode_t mode, + struct dentry *parent, u64 *value) { - return debugfs_create_file_unsafe(name, mode, parent, - value, &fops_imc_x64); + debugfs_create_file_unsafe(name, mode, parent, value, &fops_imc_x64); } /* @@ -59,9 +58,6 @@ static void export_imc_mode_and_cmd(struct device_node *node, imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root); - if (!imc_debugfs_parent) - return; - if (of_property_read_u32(node, "cb_offset", &cb_offset)) cb_offset = IMC_CNTL_BLK_OFFSET; @@ -69,21 +65,15 @@ static void export_imc_mode_and_cmd(struct device_node *node, loc = (u64)(ptr->vbase) + cb_offset; imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET); sprintf(mode, "imc_mode_%d", (u32)(ptr->id)); - if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent, - imc_mode_addr)) - goto err; + imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent, + imc_mode_addr); imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET); sprintf(cmd, "imc_cmd_%d", (u32)(ptr->id)); - if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent, - imc_cmd_addr)) - goto err; + imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent, + imc_cmd_addr); ptr++; } - return; - -err: - debugfs_remove_recursive(imc_debugfs_parent); } /* diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 22c22cd7bd82ea..57d3a6af1d52cd 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3174,11 +3174,6 @@ static void pnv_pci_ioda_create_dbgfs(void) sprintf(name, "PCI%04x", hose->global_number); phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); - if (!phb->dbgfs) { - pr_warn("%s: Error on creating debugfs on PHB#%x\n", - __func__, hose->global_number); - continue; - } debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs, phb, &pnv_pci_diag_data_fops); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 11fdae81b5ddc5..3bc188da82ba74 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -229,7 +229,7 @@ static void __noreturn pnv_restart(char *cmd) pnv_prepare_going_down(); do { - if (!cmd) + if (!cmd || !strlen(cmd)) rc = opal_cec_reboot(); else if (strcmp(cmd, "full") == 0) rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL); @@ -237,6 +237,8 @@ static void __noreturn pnv_restart(char *cmd) rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, NULL); else if (strcmp(cmd, "error") == 0) rc = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, NULL); + else if (strcmp(cmd, "fast") == 0) + rc = opal_cec_reboot2(OPAL_REBOOT_FAST, NULL); else rc = OPAL_UNSUPPORTED; diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c index 09e63df53c30a6..44035a3d6414d7 100644 --- a/arch/powerpc/platforms/powernv/vas-debug.c +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -115,7 +115,7 @@ void vas_window_free_dbgdir(struct vas_window *window) void vas_window_init_dbgdir(struct vas_window *window) { - struct dentry *f, *d; + struct dentry *d; if (!window->vinst->dbgdir) return; @@ -127,28 +127,10 @@ void vas_window_init_dbgdir(struct vas_window *window) snprintf(window->dbgname, 16, "w%d", window->winid); d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir); - if (IS_ERR(d)) - goto free_name; - window->dbgdir = d; - f = debugfs_create_file("info", 0444, d, window, &info_fops); - if (IS_ERR(f)) - goto remove_dir; - - f = debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops); - if (IS_ERR(f)) - goto remove_dir; - - return; - -remove_dir: - debugfs_remove_recursive(window->dbgdir); - window->dbgdir = NULL; - -free_name: - kfree(window->dbgname); - window->dbgname = NULL; + debugfs_create_file("info", 0444, d, window, &info_fops); + debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops); } void vas_instance_init_dbgdir(struct vas_instance *vinst) @@ -156,8 +138,6 @@ void vas_instance_init_dbgdir(struct vas_instance *vinst) struct dentry *d; vas_init_dbgdir(); - if (!vas_debugfs) - return; vinst->dbgname = kzalloc(16, GFP_KERNEL); if (!vinst->dbgname) @@ -166,16 +146,7 @@ void vas_instance_init_dbgdir(struct vas_instance *vinst) snprintf(vinst->dbgname, 16, "v%d", vinst->vas_id); d = debugfs_create_dir(vinst->dbgname, vas_debugfs); - if (IS_ERR(d)) - goto free_name; - vinst->dbgdir = d; - return; - -free_name: - kfree(vinst->dbgname); - vinst->dbgname = NULL; - vinst->dbgdir = NULL; } /* @@ -191,6 +162,4 @@ void vas_init_dbgdir(void) first_time = false; vas_debugfs = debugfs_create_dir("vas", NULL); - if (IS_ERR(vas_debugfs)) - vas_debugfs = NULL; } diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index a3c74a5cf20d74..c8a2b0b05ac008 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -29,6 +29,4 @@ obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o -ifdef CONFIG_PPC_PSERIES obj-$(CONFIG_SUSPEND) += suspend.o -endif diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 893ba3f562c4e7..845342814edcd7 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -67,8 +67,7 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index]; } #endif - eeh_add_device_early(pdn); - eeh_add_device_late(pdev); + pseries_eeh_init_edev(pdn); #ifdef CONFIG_PCI_IOV if (pdev->is_virtfn) { struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -78,7 +77,7 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) eeh_add_to_parent_pe(edev); /* Add as VF PE type */ } #endif - eeh_sysfs_add_device(pdev); + eeh_probe_device(pdev); } /* @@ -222,15 +221,16 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) } /** - * pseries_eeh_probe - EEH probe on the given device + * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a pci_dn + * * @pdn: PCI device node - * @data: Unused * - * When EEH module is installed during system boot, all PCI devices - * are checked one by one to see if it supports EEH. The function - * is introduced for the purpose. + * When we discover a new PCI device via the device-tree we create a + * corresponding pci_dn and we allocate, but don't initialise, an eeh_dev. + * This function takes care of the initialisation and inserts the eeh_dev + * into the correct eeh_pe. If no eeh_pe exists we'll allocate one. */ -static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) +void pseries_eeh_init_edev(struct pci_dn *pdn) { struct eeh_dev *edev; struct eeh_pe pe; @@ -238,18 +238,35 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) int enable = 0; int ret; - /* Retrieve OF node and eeh device */ + if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))) + return; + + /* + * Find the eeh_dev for this pdn. The storage for the eeh_dev was + * allocated at the same time as the pci_dn. + * + * XXX: We should probably re-visit that. + */ edev = pdn_to_eeh_dev(pdn); - if (!edev || edev->pe) - return NULL; + if (!edev) + return; + + /* + * If ->pe is set then we've already probed this device. We hit + * this path when a pci_dev is removed and rescanned while recovering + * a PE (i.e. for devices where the driver doesn't support error + * recovery). + */ + if (edev->pe) + return; /* Check class/vendor/device IDs */ if (!pdn->vendor_id || !pdn->device_id || !pdn->class_code) - return NULL; + return; /* Skip for PCI-ISA bridge */ if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) - return NULL; + return; eeh_edev_dbg(edev, "Probing device\n"); @@ -316,9 +333,49 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) /* Save memory bars */ eeh_save_bars(edev); +} + +static struct eeh_dev *pseries_eeh_probe(struct pci_dev *pdev) +{ + struct eeh_dev *edev; + struct pci_dn *pdn; + + pdn = pci_get_pdn_by_devfn(pdev->bus, pdev->devfn); + if (!pdn) + return NULL; + + /* + * If the system supports EEH on this device then the eeh_dev was + * configured and inserted into a PE in pseries_eeh_init_edev() + */ + edev = pdn_to_eeh_dev(pdn); + if (!edev || !edev->pe) + return NULL; + + return edev; +} + +/** + * pseries_eeh_init_edev_recursive - Enable EEH for the indicated device + * @pdn: PCI device node + * + * This routine must be used to perform EEH initialization for the + * indicated PCI device that was added after system boot (e.g. + * hotplug, dlpar). + */ +void pseries_eeh_init_edev_recursive(struct pci_dn *pdn) +{ + struct pci_dn *n; + + if (!pdn) + return; + + list_for_each_entry(n, &pdn->child_list, list) + pseries_eeh_init_edev_recursive(n); - return NULL; + pseries_eeh_init_edev(pdn); } +EXPORT_SYMBOL_GPL(pseries_eeh_init_edev_recursive); /** * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index f3ed1baa6289c5..b2cde173230151 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -223,7 +223,7 @@ static int get_lmb_range(u32 drc_index, int n_lmbs, struct drmem_lmb **end_lmb) { struct drmem_lmb *lmb, *start, *end; - struct drmem_lmb *last_lmb; + struct drmem_lmb *limit; start = NULL; for_each_drmem_lmb(lmb) { @@ -236,10 +236,10 @@ static int get_lmb_range(u32 drc_index, int n_lmbs, if (!start) return -EINVAL; - end = &start[n_lmbs - 1]; + end = &start[n_lmbs]; - last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1]; - if (end > last_lmb) + limit = &drmem_info->lmbs[drmem_info->n_lmbs]; + if (end > limit) return -EINVAL; *start_lmb = start; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 3c3da25b445c6a..e4ed5317f11783 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -636,8 +636,16 @@ static const struct proc_ops vcpudispatch_stats_freq_proc_ops = { static int __init vcpudispatch_stats_procfs_init(void) { - if (!lppaca_shared_proc(get_lppaca())) + /* + * Avoid smp_processor_id while preemptible. All CPUs should have + * the same value for lppaca_shared_proc. + */ + preempt_disable(); + if (!lppaca_shared_proc(get_lppaca())) { + preempt_enable(); return 0; + } + preempt_enable(); if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL, &vcpudispatch_stats_proc_ops)) diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c index 66dfd825671235..23241c71ef3725 100644 --- a/arch/powerpc/platforms/pseries/of_helpers.c +++ b/arch/powerpc/platforms/pseries/of_helpers.c @@ -88,7 +88,7 @@ int of_read_drc_info_cell(struct property **prop, const __be32 **curval, return -EINVAL; /* Should now know end of current entry */ - (*curval) = (void *)p2; + (*curval) = (void *)(++p2); data->last_drc_index = data->drc_index_start + ((data->num_sequential_elems - 1) * data->sequential_inc); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 0b4467e378e5d3..e4606100e2864f 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -246,8 +246,9 @@ static int papr_scm_meta_set(struct papr_scm_priv *p, return 0; } -int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, - unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) +static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len, int *cmd_rc) { struct nd_cmd_get_config_size *get_size_hdr; struct papr_scm_priv *p; diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 361986e4354e5e..b3a38f5a6b688b 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -37,7 +37,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) eeh_dev_phb_init_dynamic(phb); if (dn->child) - eeh_add_device_tree_early(PCI_DN(dn)); + pseries_eeh_init_edev_recursive(PCI_DN(dn)); pcibios_scan_phb(phb); pcibios_finish_adding_to_bus(phb->bus); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 1d7f973c647b37..aa6208c8d4f096 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -558,6 +558,9 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) switch (mce_log->error_type) { case MC_ERROR_TYPE_UE: mce_err.error_type = MCE_ERROR_TYPE_UE; + mce_common_process_ue(regs, &mce_err); + if (mce_err.ignore_event) + disposition = RTAS_DISP_FULLY_RECOVERED; switch (err_sub_type) { case MC_ERROR_UE_IFETCH: mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH; diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index f682b7babc09c1..37f1f25ba8043b 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1628,7 +1628,6 @@ const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length) } EXPORT_SYMBOL(vio_get_attribute); -#ifdef CONFIG_PPC_PSERIES /* vio_find_name() - internal because only vio.c knows how we formatted the * kobject name */ @@ -1698,7 +1697,6 @@ int vio_disable_interrupts(struct vio_dev *dev) return rc; } EXPORT_SYMBOL(vio_disable_interrupts); -#endif /* CONFIG_PPC_PSERIES */ static int __init vio_init(void) { diff --git a/arch/powerpc/platforms/pseries/vphn.c b/arch/powerpc/platforms/pseries/vphn.c index 3f07bf6c670ebd..cca474a2c39694 100644 --- a/arch/powerpc/platforms/pseries/vphn.c +++ b/arch/powerpc/platforms/pseries/vphn.c @@ -82,7 +82,8 @@ long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity) long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu); - vphn_unpack_associativity(retbuf, associativity); + if (rc == H_SUCCESS) + vphn_unpack_associativity(retbuf, associativity); return rc; } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 9651ca061828a0..b294f70f1a6765 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -68,13 +69,6 @@ static u32 xive_ipi_irq; /* Xive state for each CPU */ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); -/* - * A "disabled" interrupt should never fire, to catch problems - * we set its logical number to this - */ -#define XIVE_BAD_IRQ 0x7fffffff -#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) - /* An invalid CPU target */ #define XIVE_INVALID_TARGET (-1) @@ -265,11 +259,15 @@ notrace void xmon_xive_do_dump(int cpu) int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) { + struct irq_chip *chip = irq_data_get_irq_chip(d); int rc; u32 target; u8 prio; u32 lirq; + if (!is_xive_irq(chip)) + return -EINVAL; + rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); @@ -283,7 +281,10 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); u64 val = xive_esb_read(xd, XIVE_ESB_GET); - xmon_printf("PQ=%c%c", + xmon_printf("flags=%c%c%c PQ=%c%c", + xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', val & XIVE_ESB_VAL_P ? 'P' : '-', val & XIVE_ESB_VAL_Q ? 'Q' : '-'); } @@ -1150,7 +1151,7 @@ static int xive_setup_cpu_ipi(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); /* Check if we are already setup */ - if (xc->hw_ipi != 0) + if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; /* Grab an IPI from the backend, this will populate xc->hw_ipi */ @@ -1187,7 +1188,7 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) /* Disable the IPI and free the IRQ data */ /* Already cleaned up ? */ - if (xc->hw_ipi == 0) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; /* Mask the IPI */ @@ -1343,6 +1344,7 @@ static int xive_prepare_cpu(unsigned int cpu) if (np) xc->chip_id = of_get_ibm_chip_id(np); of_node_put(np); + xc->hw_ipi = XIVE_BAD_IRQ; per_cpu(xive_cpu, cpu) = xc; } @@ -1554,3 +1556,107 @@ static int __init xive_off(char *arg) return 0; } __setup("xive=off", xive_off); + +void xive_debug_show_cpu(struct seq_file *m, int cpu) +{ + struct xive_cpu *xc = per_cpu(xive_cpu, cpu); + + seq_printf(m, "CPU %d:", cpu); + if (xc) { + seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); + +#ifdef CONFIG_SMP + { + u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); + + seq_printf(m, "IPI=0x%08x PQ=%c%c ", xc->hw_ipi, + val & XIVE_ESB_VAL_P ? 'P' : '-', + val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + } +#endif + { + struct xive_q *q = &xc->queue[xive_irq_priority]; + u32 i0, i1, idx; + + if (q->qpage) { + idx = q->idx; + i0 = be32_to_cpup(q->qpage + idx); + idx = (idx + 1) & q->msk; + i1 = be32_to_cpup(q->qpage + idx); + seq_printf(m, "EQ idx=%d T=%d %08x %08x ...", + q->idx, q->toggle, i0, i1); + } + } + } + seq_puts(m, "\n"); +} + +void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d) +{ + struct irq_chip *chip = irq_data_get_irq_chip(d); + int rc; + u32 target; + u8 prio; + u32 lirq; + + if (!is_xive_irq(chip)) + return; + + rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); + if (rc) { + seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); + return; + } + + seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", + hw_irq, target, prio, lirq); + + if (d) { + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + u64 val = xive_esb_read(xd, XIVE_ESB_GET); + + seq_printf(m, "flags=%c%c%c PQ=%c%c", + xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', + val & XIVE_ESB_VAL_P ? 'P' : '-', + val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + } + seq_puts(m, "\n"); +} + +static int xive_core_debug_show(struct seq_file *m, void *private) +{ + unsigned int i; + struct irq_desc *desc; + int cpu; + + if (xive_ops->debug_show) + xive_ops->debug_show(m, private); + + for_each_possible_cpu(cpu) + xive_debug_show_cpu(m, cpu); + + for_each_irq_desc(i, desc) { + struct irq_data *d = irq_desc_get_irq_data(desc); + unsigned int hw_irq; + + if (!d) + continue; + + hw_irq = (unsigned int)irqd_to_hwirq(d); + + /* IPIs are special (HW number 0) */ + if (hw_irq) + xive_debug_show_irq(m, hw_irq, d); + } + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_core_debug); + +int xive_core_debug_init(void) +{ + debugfs_create_file("xive", 0400, powerpc_debugfs_root, + NULL, &xive_core_debug_fops); + return 0; +} diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 0ff6b739052c8f..5218fdc4b29a96 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -312,7 +313,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) s64 rc; /* Free the IPI */ - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; for (;;) { rc = opal_xive_free_irq(xc->hw_ipi); @@ -320,7 +321,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) msleep(OPAL_BUSY_DELAY_MS); continue; } - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; break; } } @@ -850,3 +851,5 @@ int xive_native_get_vp_state(u32 vp_id, u64 *out_state) return 0; } EXPORT_SYMBOL_GPL(xive_native_get_vp_state); + +machine_arch_initcall(powernv, xive_core_debug_init); diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 55dc61cb4867b1..7ab5c67809977c 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -560,11 +561,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) { - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; xive_irq_bitmap_free(xc->hw_ipi); - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; } #endif /* CONFIG_SMP */ @@ -645,6 +646,21 @@ static void xive_spapr_sync_source(u32 hw_irq) plpar_int_sync(0, hw_irq); } +static int xive_spapr_debug_show(struct seq_file *m, void *private) +{ + struct xive_irq_bitmap *xibm; + char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + + list_for_each_entry(xibm, &xive_irq_bitmaps, list) { + memset(buf, 0, PAGE_SIZE); + bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count); + seq_printf(m, "bitmap #%d: %s", xibm->count, buf); + } + kfree(buf); + + return 0; +} + static const struct xive_ops xive_spapr_ops = { .populate_irq_data = xive_spapr_populate_irq_data, .configure_irq = xive_spapr_configure_irq, @@ -662,6 +678,7 @@ static const struct xive_ops xive_spapr_ops = { #ifdef CONFIG_SMP .get_ipi = xive_spapr_get_ipi, .put_ipi = xive_spapr_put_ipi, + .debug_show = xive_spapr_debug_show, #endif /* CONFIG_SMP */ .name = "spapr", }; @@ -839,3 +856,5 @@ bool __init xive_spapr_init(void) pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; } + +machine_arch_initcall(pseries, xive_core_debug_init); diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 59cd366e7933a7..b7b901da216887 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -5,6 +5,13 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H +/* + * A "disabled" interrupt should never fire, to catch problems + * we set its logical number to this + */ +#define XIVE_BAD_IRQ 0x7fffffff +#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) + /* Each CPU carry one of these with various per-CPU state */ struct xive_cpu { #ifdef CONFIG_SMP @@ -50,12 +57,14 @@ struct xive_ops { int (*get_ipi)(unsigned int cpu, struct xive_cpu *xc); void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc); #endif + int (*debug_show)(struct seq_file *m, void *private); const char *name; }; bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, u8 max_prio); __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift); +int xive_core_debug_init(void); static inline u32 xive_alloc_order(u32 queue_shift) { diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index c3842dbeb1b75e..6f9cccea54f3ba 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,9 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -# Avoid clang warnings around longjmp/setjmp declarations -subdir-ccflags-y := -ffreestanding - GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0ec9640335bb35..7af840c0fc933b 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -81,8 +81,9 @@ static bool xmon_is_ro = IS_ENABLED(CONFIG_XMON_DEFAULT_RO_MODE); static unsigned long adrs; static int size = 1; -#define MAX_DUMP (128 * 1024) +#define MAX_DUMP (64 * 1024) static unsigned long ndump = 64; +#define MAX_IDUMP (MAX_DUMP >> 2) static unsigned long nidump = 16; static unsigned long ncsum = 4096; static int termch; @@ -2712,7 +2713,12 @@ static void dump_by_size(unsigned long addr, long count, int size) printf("%0*llx", size * 2, val); } - printf("\n"); + printf(" |"); + for (j = 0; j < 16; ++j) { + val = temp[j]; + putchar(' ' <= val && val <= '~' ? val : '.'); + } + printf("|\n"); } } @@ -2756,8 +2762,8 @@ dump(void) scanhex(&nidump); if (nidump == 0) nidump = 16; - else if (nidump > MAX_DUMP) - nidump = MAX_DUMP; + else if (nidump > MAX_IDUMP) + nidump = MAX_IDUMP; adrs += ppc_inst_dump(adrs, nidump, 1); last_cmd = "di\n"; } else if (c == 'l') { diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 56f4bc0d209ec4..8646eb197cd967 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -902,6 +902,7 @@ static struct notifier_block powernv_cpufreq_reboot_nb = { void powernv_cpufreq_work_fn(struct work_struct *work) { struct chip *chip = container_of(work, struct chip, throttle); + struct cpufreq_policy *policy; unsigned int cpu; cpumask_t mask; @@ -916,12 +917,14 @@ void powernv_cpufreq_work_fn(struct work_struct *work) chip->restore = false; for_each_cpu(cpu, &mask) { int index; - struct cpufreq_policy policy; - cpufreq_get_policy(&policy, cpu); - index = cpufreq_table_find_index_c(&policy, policy.cur); - powernv_cpufreq_target_index(&policy, index); - cpumask_andnot(&mask, &mask, policy.cpus); + policy = cpufreq_cpu_get(cpu); + if (!policy) + continue; + index = cpufreq_table_find_index_c(policy, policy->cur); + powernv_cpufreq_target_index(policy, index); + cpumask_andnot(&mask, &mask, policy->cpus); + cpufreq_cpu_put(policy); } out: put_online_cpus(); @@ -1080,6 +1083,12 @@ static int init_chip_info(void) static inline void clean_chip_info(void) { + int i; + + /* flush any pending work items */ + if (chips) + for (i = 0; i < nr_chips; i++) + cancel_work_sync(&chips[i].throttle); kfree(chips); } @@ -1108,9 +1117,6 @@ static int __init powernv_cpufreq_init(void) if (rc) goto out; - register_reboot_notifier(&powernv_cpufreq_reboot_nb); - opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); - if (powernv_pstate_info.wof_enabled) powernv_cpufreq_driver.boost_enabled = true; else @@ -1119,15 +1125,17 @@ static int __init powernv_cpufreq_init(void) rc = cpufreq_register_driver(&powernv_cpufreq_driver); if (rc) { pr_info("Failed to register the cpufreq driver (%d)\n", rc); - goto cleanup_notifiers; + goto cleanup; } if (powernv_pstate_info.wof_enabled) cpufreq_enable_boost_support(); + register_reboot_notifier(&powernv_cpufreq_reboot_nb); + opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); + return 0; -cleanup_notifiers: - unregister_all_notifiers(); +cleanup: clean_chip_info(); out: pr_info("Platform driver disabled. System does not support PState control\n"); diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index 977946e4e6132f..c5eb509c72f0f3 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -140,7 +140,7 @@ static void dlpar_pci_add_bus(struct device_node *dn) struct pci_controller *phb = pdn->phb; struct pci_dev *dev = NULL; - eeh_add_device_tree_early(pdn); + pseries_eeh_init_edev_recursive(pdn); /* Add EADS device to PHB bus, adding new entry to bus->devices */ dev = of_create_pci_dev(dn, phb->bus, pdn->devfn); diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index e408e4021ceeb5..6504869efabceb 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -288,11 +288,10 @@ EXPORT_SYMBOL_GPL(rpaphp_check_drc_props); static int is_php_type(char *drc_type) { - unsigned long value; char *endptr; /* PCI Hotplug nodes have an integer for drc_type */ - value = simple_strtoul(drc_type, &endptr, 10); + simple_strtoul(drc_type, &endptr, 10); if (endptr == drc_type) return 0; @@ -494,6 +493,8 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) return retval; if (state == PRESENT) { + pseries_eeh_init_edev_recursive(PCI_DN(slot->dn)); + pci_lock_rescan_remove(); pci_hp_add_devices(slot->bus); pci_unlock_rescan_remove(); diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c index beca61badeea4c..c380bdacd1466c 100644 --- a/drivers/pci/hotplug/rpaphp_pci.c +++ b/drivers/pci/hotplug/rpaphp_pci.c @@ -95,8 +95,10 @@ int rpaphp_enable_slot(struct slot *slot) return -EINVAL; } - if (list_empty(&bus->devices)) + if (list_empty(&bus->devices)) { + pseries_eeh_init_edev_recursive(PCI_DN(slot->dn)); pci_hp_add_devices(bus); + } if (!list_empty(&bus->devices)) { slot->state = CONFIGURED; diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c index 769e0a5d1dfccf..3c6dd06ec5fb13 100644 --- a/drivers/tty/ehv_bytechan.c +++ b/drivers/tty/ehv_bytechan.c @@ -136,6 +136,21 @@ static int find_console_handle(void) return 1; } +static unsigned int local_ev_byte_channel_send(unsigned int handle, + unsigned int *count, + const char *p) +{ + char buffer[EV_BYTE_CHANNEL_MAX_BYTES]; + unsigned int c = *count; + + if (c < sizeof(buffer)) { + memcpy(buffer, p, c); + memset(&buffer[c], 0, sizeof(buffer) - c); + p = buffer; + } + return ev_byte_channel_send(handle, count, p); +} + /*************************** EARLY CONSOLE DRIVER ***************************/ #ifdef CONFIG_PPC_EARLY_DEBUG_EHV_BC @@ -154,7 +169,7 @@ static void byte_channel_spin_send(const char data) do { count = 1; - ret = ev_byte_channel_send(CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE, + ret = local_ev_byte_channel_send(CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE, &count, &data); } while (ret == EV_EAGAIN); } @@ -221,7 +236,7 @@ static int ehv_bc_console_byte_channel_send(unsigned int handle, const char *s, while (count) { len = min_t(unsigned int, count, EV_BYTE_CHANNEL_MAX_BYTES); do { - ret = ev_byte_channel_send(handle, &len, s); + ret = local_ev_byte_channel_send(handle, &len, s); } while (ret == EV_EAGAIN); count -= len; s += len; @@ -401,7 +416,7 @@ static void ehv_bc_tx_dequeue(struct ehv_bc_data *bc) CIRC_CNT_TO_END(bc->head, bc->tail, BUF_SIZE), EV_BYTE_CHANNEL_MAX_BYTES); - ret = ev_byte_channel_send(bc->handle, &len, bc->buf + bc->tail); + ret = local_ev_byte_channel_send(bc->handle, &len, bc->buf + bc->tail); /* 'len' is valid only if the return code is 0 or EV_EAGAIN */ if (!ret || (ret == EV_EAGAIN)) diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 5afe0e7ff7cd1c..fbb117757c5248 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -425,6 +425,25 @@ EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group); int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, struct kobject *target_kobj, const char *target_name) +{ + return compat_only_sysfs_link_entry_to_kobj(kobj, target_kobj, + target_name, NULL); +} +EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj); + +/** + * compat_only_sysfs_link_entry_to_kobj - add a symlink to a kobject pointing + * to a group or an attribute + * @kobj: The kobject containing the group. + * @target_kobj: The target kobject. + * @target_name: The name of the target group or attribute. + * @symlink_name: The name of the symlink file (target_name will be + * considered if symlink_name is NULL). + */ +int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, + struct kobject *target_kobj, + const char *target_name, + const char *symlink_name) { struct kernfs_node *target; struct kernfs_node *entry; @@ -449,15 +468,18 @@ int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, return -ENOENT; } - link = kernfs_create_link(kobj->sd, target_name, entry); + if (!symlink_name) + symlink_name = target_name; + + link = kernfs_create_link(kobj->sd, symlink_name, entry); if (PTR_ERR(link) == -EEXIST) - sysfs_warn_dup(kobj->sd, target_name); + sysfs_warn_dup(kobj->sd, symlink_name); kernfs_put(entry); kernfs_put(target); return PTR_ERR_OR_ZERO(link); } -EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj); +EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj); static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn, const struct attribute_group *grp, diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 4beb51009b6217..fd0fcb4d4f4d99 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -300,6 +300,10 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, struct kobject *target_kobj, const char *target_name); +int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, + struct kobject *target_kobj, + const char *target_name, + const char *symlink_name); void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); @@ -520,6 +524,14 @@ static inline int __compat_only_sysfs_link_entry_to_kobj( return 0; } +static inline int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, + struct kobject *target_kobj, + const char *target_name, + const char *symlink_name) +{ + return 0; +} + static inline void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) { diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index d40300a65b42f7..a32a6ab8991449 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -2,6 +2,8 @@ TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall TEST_GEN_FILES := exec_target +TEST_FILES := settings + CFLAGS += -O2 top_srcdir = ../../../../.. diff --git a/tools/testing/selftests/powerpc/benchmarks/settings b/tools/testing/selftests/powerpc/benchmarks/settings new file mode 100644 index 00000000000000..e7b9417537fbc4 --- /dev/null +++ b/tools/testing/selftests/powerpc/benchmarks/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile index 5df476364b4d46..cfa6eedcb66ca2 100644 --- a/tools/testing/selftests/powerpc/dscr/Makefile +++ b/tools/testing/selftests/powerpc/dscr/Makefile @@ -3,6 +3,8 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \ dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \ dscr_sysfs_thread_test +TEST_FILES := settings + top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/powerpc/dscr/settings new file mode 100644 index 00000000000000..e7b9417537fbc4 --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 7cf7ad261d02d0..2ca523255b1b22 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -7,3 +7,4 @@ segv_errors wild_bctr large_vm_fork_separation bad_accesses +tlbie_test diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 417306353e07bb..ca35dd8848b0aa 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -7,6 +7,7 @@ noarg: # The EBB handler is 64-bit code and everything links against it CFLAGS += -m64 +TMPOUT = $(OUTPUT)/ # Toolchains may build PIE by default which breaks the assembly no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore index f897b55a44dd5e..405b5364044cd9 100644 --- a/tools/testing/selftests/powerpc/signal/.gitignore +++ b/tools/testing/selftests/powerpc/signal/.gitignore @@ -2,3 +2,4 @@ signal signal_tm sigfuz +sigreturn_vdso diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index 113838fbbe7f7d..932a032bf03616 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -1,10 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := signal signal_tm sigfuz +TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso CFLAGS += -maltivec $(OUTPUT)/signal_tm: CFLAGS += -mhtm $(OUTPUT)/sigfuz: CFLAGS += -pthread -m64 +TEST_FILES := settings + top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/signal/settings b/tools/testing/selftests/powerpc/signal/settings new file mode 100644 index 00000000000000..e7b9417537fbc4 --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c b/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c new file mode 100644 index 00000000000000..e282fff0fe259e --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that we can take signals with and without the VDSO mapped, which trigger + * different paths in the signal handling code. + * + * See handle_rt_signal64() and setup_trampoline() in signal_64.c + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +// Ensure assert() is not compiled out +#undef NDEBUG +#include + +#include "utils.h" + +static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high) +{ + unsigned long start, end; + static char buf[4096]; + char name[128]; + FILE *f; + int rc = -1; + + f = fopen("/proc/self/maps", "r"); + if (!f) { + perror("fopen"); + return -1; + } + + while (fgets(buf, sizeof(buf), f)) { + rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n", + &start, &end, name); + if (rc == 2) + continue; + + if (rc != 3) { + printf("sscanf errored\n"); + rc = -1; + break; + } + + if (strstr(name, needle)) { + *low = start; + *high = end - 1; + rc = 0; + break; + } + } + + fclose(f); + + return rc; +} + +static volatile sig_atomic_t took_signal = 0; + +static void sigusr1_handler(int sig) +{ + took_signal++; +} + +int test_sigreturn_vdso(void) +{ + unsigned long low, high, size; + struct sigaction act; + char *p; + + act.sa_handler = sigusr1_handler; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + + assert(sigaction(SIGUSR1, &act, NULL) == 0); + + // Confirm the VDSO is mapped, and work out where it is + assert(search_proc_maps("[vdso]", &low, &high) == 0); + size = high - low + 1; + printf("VDSO is at 0x%lx-0x%lx (%lu bytes)\n", low, high, size); + + kill(getpid(), SIGUSR1); + assert(took_signal == 1); + printf("Signal delivered OK with VDSO mapped\n"); + + // Remap the VDSO somewhere else + p = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + assert(p != MAP_FAILED); + assert(mremap((void *)low, size, size, MREMAP_MAYMOVE|MREMAP_FIXED, p) != MAP_FAILED); + assert(search_proc_maps("[vdso]", &low, &high) == 0); + size = high - low + 1; + printf("VDSO moved to 0x%lx-0x%lx (%lu bytes)\n", low, high, size); + + kill(getpid(), SIGUSR1); + assert(took_signal == 2); + printf("Signal delivered OK with VDSO moved\n"); + + assert(munmap((void *)low, size) == 0); + printf("Unmapped VDSO\n"); + + // Confirm the VDSO is not mapped anymore + assert(search_proc_maps("[vdso]", &low, &high) != 0); + + // Make the stack executable + assert(search_proc_maps("[stack]", &low, &high) == 0); + size = high - low + 1; + mprotect((void *)low, size, PROT_READ|PROT_WRITE|PROT_EXEC); + printf("Remapped the stack executable\n"); + + kill(getpid(), SIGUSR1); + assert(took_signal == 3); + printf("Signal delivered OK with VDSO unmapped\n"); + + return 0; +} + +int main(void) +{ + return test_harness(test_sigreturn_vdso, "sigreturn_vdso"); +} diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 7baf2a46002f38..d8900a0c47a1f7 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -14,6 +14,7 @@ tm-signal-context-chk-vmx tm-signal-context-chk-vsx tm-signal-context-force-tm tm-signal-sigreturn-nt +tm-signal-pagefault tm-vmx-unavail tm-unavailable tm-trap diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index b15a1a325bd037..0b0db8d3857c83 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -5,7 +5,9 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \ - tm-signal-context-force-tm tm-poison + tm-signal-context-force-tm tm-poison tm-signal-pagefault + +TEST_FILES := settings top_srcdir = ../../../../.. include ../../lib.mk @@ -22,6 +24,7 @@ $(OUTPUT)/tm-resched-dscr: ../pmu/lib.c $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx $(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64 $(OUTPUT)/tm-signal-context-force-tm: CFLAGS += -pthread -m64 +$(OUTPUT)/tm-signal-pagefault: CFLAGS += -pthread -m64 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S diff --git a/tools/testing/selftests/powerpc/tm/settings b/tools/testing/selftests/powerpc/tm/settings new file mode 100644 index 00000000000000..e7b9417537fbc4 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c index 31717625f31861..421cb082f6befc 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c @@ -42,9 +42,10 @@ #endif /* Setting contexts because the test will crash and we want to recover */ -ucontext_t init_context, main_context; +ucontext_t init_context; -static int count, first_time; +/* count is changed in the signal handler, so it must be volatile */ +static volatile int count; void usr_signal_handler(int signo, siginfo_t *si, void *uc) { @@ -98,11 +99,6 @@ void usr_signal_handler(int signo, siginfo_t *si, void *uc) void seg_signal_handler(int signo, siginfo_t *si, void *uc) { - if (count == COUNT_MAX) { - /* Return to tm_signal_force_msr() and exit */ - setcontext(&main_context); - } - count++; /* Reexecute the test */ @@ -126,37 +122,41 @@ void tm_trap_test(void) */ getcontext(&init_context); - /* Allocated an alternative signal stack area */ - ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); - ss.ss_size = SIGSTKSZ; - ss.ss_flags = 0; + while (count < COUNT_MAX) { + /* Allocated an alternative signal stack area */ + ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + ss.ss_size = SIGSTKSZ; + ss.ss_flags = 0; - if (ss.ss_sp == (void *)-1) { - perror("mmap error\n"); - exit(-1); - } + if (ss.ss_sp == (void *)-1) { + perror("mmap error\n"); + exit(-1); + } - /* Force the allocation through a page fault */ - if (madvise(ss.ss_sp, SIGSTKSZ, MADV_DONTNEED)) { - perror("madvise\n"); - exit(-1); - } + /* Force the allocation through a page fault */ + if (madvise(ss.ss_sp, SIGSTKSZ, MADV_DONTNEED)) { + perror("madvise\n"); + exit(-1); + } - /* Setting an alternative stack to generate a page fault when - * the signal is raised. - */ - if (sigaltstack(&ss, NULL)) { - perror("sigaltstack\n"); - exit(-1); + /* + * Setting an alternative stack to generate a page fault when + * the signal is raised. + */ + if (sigaltstack(&ss, NULL)) { + perror("sigaltstack\n"); + exit(-1); + } + + /* The signal handler will enable MSR_TS */ + sigaction(SIGUSR1, &usr_sa, NULL); + /* If it does not crash, it might segfault, avoid it to retest */ + sigaction(SIGSEGV, &seg_sa, NULL); + + raise(SIGUSR1); + count++; } - - /* The signal handler will enable MSR_TS */ - sigaction(SIGUSR1, &usr_sa, NULL); - /* If it does not crash, it will segfault, avoid it to retest */ - sigaction(SIGSEGV, &seg_sa, NULL); - - raise(SIGUSR1); } int tm_signal_context_force_tm(void) @@ -169,11 +169,7 @@ int tm_signal_context_force_tm(void) */ SKIP_IF(!is_ppc64le()); - /* Will get back here after COUNT_MAX interactions */ - getcontext(&main_context); - - if (!first_time++) - tm_trap_test(); + tm_trap_test(); return EXIT_SUCCESS; } diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c new file mode 100644 index 00000000000000..5908bc6abe60c9 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2020, Gustavo Luiz Duarte, IBM Corp. + * + * This test starts a transaction and triggers a signal, forcing a pagefault to + * happen when the kernel signal handling code touches the user signal stack. + * + * In order to avoid pre-faulting the signal stack memory and to force the + * pagefault to happen precisely in the kernel signal handling code, the + * pagefault handling is done in userspace using the userfaultfd facility. + * + * Further pagefaults are triggered by crafting the signal handler's ucontext + * to point to additional memory regions managed by the userfaultfd, so using + * the same mechanism used to avoid pre-faulting the signal stack memory. + * + * On failure (bug is present) kernel crashes or never returns control back to + * userspace. If bug is not present, tests completes almost immediately. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tm.h" + + +#define UF_MEM_SIZE 655360 /* 10 x 64k pages */ + +/* Memory handled by userfaultfd */ +static char *uf_mem; +static size_t uf_mem_offset = 0; + +/* + * Data that will be copied into the faulting pages (instead of zero-filled + * pages). This is used to make the test more reliable and avoid segfaulting + * when we return from the signal handler. Since we are making the signal + * handler's ucontext point to newly allocated memory, when that memory is + * paged-in it will contain the expected content. + */ +static char backing_mem[UF_MEM_SIZE]; + +static size_t pagesize; + +/* + * Return a chunk of at least 'size' bytes of memory that will be handled by + * userfaultfd. If 'backing_data' is not NULL, its content will be save to + * 'backing_mem' and then copied into the faulting pages when the page fault + * is handled. + */ +void *get_uf_mem(size_t size, void *backing_data) +{ + void *ret; + + if (uf_mem_offset + size > UF_MEM_SIZE) { + fprintf(stderr, "Requesting more uf_mem than expected!\n"); + exit(EXIT_FAILURE); + } + + ret = &uf_mem[uf_mem_offset]; + + /* Save the data that will be copied into the faulting page */ + if (backing_data != NULL) + memcpy(&backing_mem[uf_mem_offset], backing_data, size); + + /* Reserve the requested amount of uf_mem */ + uf_mem_offset += size; + /* Keep uf_mem_offset aligned to the page size (round up) */ + uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1); + + return ret; +} + +void *fault_handler_thread(void *arg) +{ + struct uffd_msg msg; /* Data read from userfaultfd */ + long uffd; /* userfaultfd file descriptor */ + struct uffdio_copy uffdio_copy; + struct pollfd pollfd; + ssize_t nread, offset; + + uffd = (long) arg; + + for (;;) { + pollfd.fd = uffd; + pollfd.events = POLLIN; + if (poll(&pollfd, 1, -1) == -1) { + perror("poll() failed"); + exit(EXIT_FAILURE); + } + + nread = read(uffd, &msg, sizeof(msg)); + if (nread == 0) { + fprintf(stderr, "read(): EOF on userfaultfd\n"); + exit(EXIT_FAILURE); + } + + if (nread == -1) { + perror("read() failed"); + exit(EXIT_FAILURE); + } + + /* We expect only one kind of event */ + if (msg.event != UFFD_EVENT_PAGEFAULT) { + fprintf(stderr, "Unexpected event on userfaultfd\n"); + exit(EXIT_FAILURE); + } + + /* + * We need to handle page faults in units of pages(!). + * So, round faulting address down to page boundary. + */ + uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1); + + offset = (char *) uffdio_copy.dst - uf_mem; + uffdio_copy.src = (unsigned long) &backing_mem[offset]; + + uffdio_copy.len = pagesize; + uffdio_copy.mode = 0; + uffdio_copy.copy = 0; + if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) { + perror("ioctl-UFFDIO_COPY failed"); + exit(EXIT_FAILURE); + } + } +} + +void setup_uf_mem(void) +{ + long uffd; /* userfaultfd file descriptor */ + pthread_t thr; + struct uffdio_api uffdio_api; + struct uffdio_register uffdio_register; + int ret; + + pagesize = sysconf(_SC_PAGE_SIZE); + + /* Create and enable userfaultfd object */ + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd == -1) { + perror("userfaultfd() failed"); + exit(EXIT_FAILURE); + } + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { + perror("ioctl-UFFDIO_API failed"); + exit(EXIT_FAILURE); + } + + /* + * Create a private anonymous mapping. The memory will be demand-zero + * paged, that is, not yet allocated. When we actually touch the memory + * the related page will be allocated via the userfaultfd mechanism. + */ + uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (uf_mem == MAP_FAILED) { + perror("mmap() failed"); + exit(EXIT_FAILURE); + } + + /* + * Register the memory range of the mapping we've just mapped to be + * handled by the userfaultfd object. In 'mode' we request to track + * missing pages (i.e. pages that have not yet been faulted-in). + */ + uffdio_register.range.start = (unsigned long) uf_mem; + uffdio_register.range.len = UF_MEM_SIZE; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { + perror("ioctl-UFFDIO_REGISTER"); + exit(EXIT_FAILURE); + } + + /* Create a thread that will process the userfaultfd events */ + ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd); + if (ret != 0) { + fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret); + exit(EXIT_FAILURE); + } +} + +/* + * Assumption: the signal was delivered while userspace was in transactional or + * suspended state, i.e. uc->uc_link != NULL. + */ +void signal_handler(int signo, siginfo_t *si, void *uc) +{ + ucontext_t *ucp = uc; + + /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */ + ucp->uc_link->uc_mcontext.regs->nip += 4; + + ucp->uc_mcontext.v_regs = + get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs); + + ucp->uc_link->uc_mcontext.v_regs = + get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs); + + ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link); +} + +bool have_userfaultfd(void) +{ + long rc; + + errno = 0; + rc = syscall(__NR_userfaultfd, -1); + + return rc == 0 || errno != ENOSYS; +} + +int tm_signal_pagefault(void) +{ + struct sigaction sa; + stack_t ss; + + SKIP_IF(!have_htm()); + SKIP_IF(!have_userfaultfd()); + + setup_uf_mem(); + + /* + * Set an alternative stack that will generate a page fault when the + * signal is raised. The page fault will be treated via userfaultfd, + * i.e. via fault_handler_thread. + */ + ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL); + ss.ss_size = SIGSTKSZ; + ss.ss_flags = 0; + if (sigaltstack(&ss, NULL) == -1) { + perror("sigaltstack() failed"); + exit(EXIT_FAILURE); + } + + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + sa.sa_sigaction = signal_handler; + if (sigaction(SIGTRAP, &sa, NULL) == -1) { + perror("sigaction() failed"); + exit(EXIT_FAILURE); + } + + /* Trigger a SIGTRAP in transactional state */ + asm __volatile__( + "tbegin.;" + "beq 1f;" + "trap;" + "1: ;" + : : : "memory"); + + /* Trigger a SIGTRAP in suspended state */ + asm __volatile__( + "tbegin.;" + "beq 1f;" + "tsuspend.;" + "trap;" + "tresume.;" + "1: ;" + : : : "memory"); + + return EXIT_SUCCESS; +} + +int main(int argc, char **argv) +{ + /* + * Depending on kernel config, the TM Bad Thing might not result in a + * crash, instead the kernel never returns control back to userspace, so + * set a tight timeout. If the test passes it completes almost + * immediately. + */ + test_harness_set_timeout(2); + return test_harness(tm_signal_pagefault, "tm_signal_pagefault"); +}