Merge tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linu…

…x/kernel/git/greentime/linux Pull nds32 updates from Greentime Hu: - Perf support - Power management support - FPU support - Hardware prefetcher support - Build error fixed - Performance enhancement * tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux: nds32: support hardware prefetcher nds32: Fix the items of hwcap_str ordering issue. math-emu/soft-fp.h: (_FP_ROUND_ZERO) cast 0 to void to fix warning math-emu/op-2.h: Use statement expressions to prevent negative constant shift nds32: support denormalized result through FP emulator nds32: Support FP emulation nds32: nds32 FPU port nds32: Remove duplicated include from pm.c nds32: Power management for nds32 nds32: Add document for NDS32 PMU. nds32: Add perf call-graph support. nds32: Perf porting nds32: Fix bug in bitfield.h nds32: Fix gcc 8.0 compiler option incompatible. nds32: Fill all TLB entries with kernel image mapping nds32: Remove the redundant assignment
kawasaki · Dec 29, 2018 · 889bb74 · 889bb74
2 parents 903b77c + e2f3f8b
commit 889bb74
Show file tree

Hide file tree

Showing 65 changed files with 4,441 additions and 89 deletions.
diff --git a/Documentation/devicetree/bindings/perf/nds32v3-pmu.txt b/Documentation/devicetree/bindings/perf/nds32v3-pmu.txt
@@ -0,0 +1,17 @@
+* NDS32 Performance Monitor Units
+
+NDS32 core have a PMU for counting cpu and cache events like cache misses.
+The NDS32 PMU representation in the device tree should be done as under:
+
+Required properties:
+
+- compatible :
+	"andestech,nds32v3-pmu"
+
+- interrupts : The interrupt number for NDS32 PMU is 13.
+
+Example:
+pmu{
+	compatible = "andestech,nds32v3-pmu";
+	interrupts = <13>;
+}
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
@@ -28,7 +28,9 @@ config NDS32
 	select HANDLE_DOMAIN_IRQ
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_EXIT_THREAD
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
 	select LOCKDEP_SUPPORT
 	select MODULES_USE_ELF_RELA
@@ -91,3 +93,13 @@ endmenu
 menu "Kernel Features"
 source "kernel/Kconfig.hz"
 endmenu
+
+menu "Power management options"
+config SYS_SUPPORTS_APM_EMULATION
+	bool
+
+config ARCH_SUSPEND_POSSIBLE
+	def_bool y
+
+source "kernel/power/Kconfig"
+endmenu
diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu
@@ -7,6 +7,40 @@ config CPU_LITTLE_ENDIAN
 	bool "Little endian"
 	default y
 
+config FPU
+	bool "FPU support"
+	default n
+	help
+	  If FPU ISA is used in user space, this configuration shall be Y to
+          enable required support in kerenl such as fpu context switch and
+          fpu exception handler.
+
+	  If no FPU ISA is used in user space, say N.
+
+config LAZY_FPU
+	bool "lazy FPU support"
+	depends on FPU
+	default y
+	help
+	  Say Y here to enable the lazy FPU scheme. The lazy FPU scheme can
+          enhance system performance by reducing the context switch
+	  frequency of the FPU register.
+
+	  For nomal case, say Y.
+
+config SUPPORT_DENORMAL_ARITHMETIC
+	bool "Denormal arithmetic support"
+	depends on FPU
+	default n
+	help
+	  Say Y here to enable arithmetic of denormalized number. Enabling
+	  this feature can enhance the precision for tininess number.
+	  However, performance loss in float pointe calculations is
+	  possibly significant due to additional FPU exception.
+
+	  If the calculated tolerance for tininess number is not critical,
+	  say N to prevent performance loss.
+
 config HWZOL
 	bool "hardware zero overhead loop support"
 	depends on CPU_D10 || CPU_D15
@@ -143,6 +177,13 @@ config CACHE_L2
 	  Say Y here to enable L2 cache if your SoC are integrated with L2CC.
 	  If unsure, say N.
 
+config HW_PRE
+	bool "Enable hardware prefetcher"
+	default y
+	help
+	  Say Y here to enable hardware prefetcher feature.
+	  Only when CPU_VER.REV >= 0x09 can support.
+
 menu "Memory configuration"
 
 choice

diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile
@@ -5,10 +5,14 @@ KBUILD_DEFCONFIG := defconfig
 
 comma = ,
 
+
 ifdef CONFIG_FUNCTION_TRACER
 arch-y += -malways-save-lp -mno-relax
 endif
 
+# Avoid generating FPU instructions
+arch-y  += -mno-ext-fpu-sp -mno-ext-fpu-dp -mfloat-abi=soft
+
 KBUILD_CFLAGS	+= $(call cc-option, -mno-sched-prolog-epilog)
 KBUILD_CFLAGS	+= -mcmodel=large
 
@@ -26,6 +30,7 @@ export	TEXTADDR
 
 # If we have a machine-specific directory, then include it in the build.
 core-y				+= arch/nds32/kernel/ arch/nds32/mm/
+core-$(CONFIG_FPU)              += arch/nds32/math-emu/
 libs-y				+= arch/nds32/lib/
 
 ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""'

diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts
@@ -82,4 +82,9 @@
 			interrupts = <18>;
 		};
 	};
+
+	pmu {
+		compatible = "andestech,nds32v3-pmu";
+		interrupts= <13>;
+	};
 };
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
@@ -36,6 +36,7 @@ generic-y += kprobes.h
 generic-y += kvm_para.h
 generic-y += limits.h
 generic-y += local.h
+generic-y += local64.h
 generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += parport.h

diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h
@@ -251,6 +251,11 @@
 #define ITYPE_mskSTYPE		( 0xF  << ITYPE_offSTYPE )
 #define ITYPE_mskCPID		( 0x3  << ITYPE_offCPID )
 
+/* Additional definitions of ITYPE register for FPU */
+#define FPU_DISABLE_EXCEPTION	(0x1  << ITYPE_offSTYPE)
+#define FPU_EXCEPTION		(0x2  << ITYPE_offSTYPE)
+#define FPU_CPID		0	/* FPU Co-Processor ID is 0 */
+
 #define NDS32_VECTOR_mskNONEXCEPTION	0x78
 #define NDS32_VECTOR_offEXCEPTION	8
 #define NDS32_VECTOR_offINTERRUPT	9
@@ -692,8 +697,8 @@
 #define PFM_CTL_offKU1		13	/* Enable user mode event counting for PFMC1 */
 #define PFM_CTL_offKU2		14	/* Enable user mode event counting for PFMC2 */
 #define PFM_CTL_offSEL0		15	/* The event selection for PFMC0 */
-#define PFM_CTL_offSEL1		21	/* The event selection for PFMC1 */
-#define PFM_CTL_offSEL2		27	/* The event selection for PFMC2 */
+#define PFM_CTL_offSEL1		16	/* The event selection for PFMC1 */
+#define PFM_CTL_offSEL2		22	/* The event selection for PFMC2 */
 /* bit 28:31 reserved */
 
 #define PFM_CTL_mskEN0		( 0x01  << PFM_CTL_offEN0 )
@@ -735,14 +740,20 @@
 #define N13MISC_CTL_offRTP	1	/* Disable Return Target Predictor */
 #define N13MISC_CTL_offPTEPF	2	/* Disable HPTWK L2 PTE pefetch */
 #define N13MISC_CTL_offSP_SHADOW_EN	4	/* Enable shadow stack pointers */
+#define MISC_CTL_offHWPRE      11      /* Enable HardWare PREFETCH */
 /* bit 6, 9:31 reserved */
 
 #define N13MISC_CTL_makBTB	( 0x1  << N13MISC_CTL_offBTB )
 #define N13MISC_CTL_makRTP	( 0x1  << N13MISC_CTL_offRTP )
 #define N13MISC_CTL_makPTEPF	( 0x1  << N13MISC_CTL_offPTEPF )
 #define N13MISC_CTL_makSP_SHADOW_EN	( 0x1  << N13MISC_CTL_offSP_SHADOW_EN )
+#define MISC_CTL_makHWPRE_EN     ( 0x1  << MISC_CTL_offHWPRE )
 
+#ifdef CONFIG_HW_PRE
+#define MISC_init	(N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN|MISC_CTL_makHWPRE_EN)
+#else
 #define MISC_init	(N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN)
+#endif
 
 /******************************************************************************
  * PRUSR_ACC_CTL (Privileged Resource User Access Control Registers)
@@ -926,6 +937,7 @@
 #define FPCSR_mskDNIT           ( 0x1  << FPCSR_offDNIT )
 #define FPCSR_mskRIT		( 0x1  << FPCSR_offRIT )
 #define FPCSR_mskALL		(FPCSR_mskIVO | FPCSR_mskDBZ | FPCSR_mskOVF | FPCSR_mskUDF | FPCSR_mskIEX)
+#define FPCSR_mskALLE_NO_UDFE	(FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskIEXE)
 #define FPCSR_mskALLE		(FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskUDFE | FPCSR_mskIEXE)
 #define FPCSR_mskALLT           (FPCSR_mskIVOT | FPCSR_mskDBZT | FPCSR_mskOVFT | FPCSR_mskUDFT | FPCSR_mskIEXT |FPCSR_mskDNIT | FPCSR_mskRIT)
 
@@ -946,6 +958,15 @@
 #define FPCFG_mskIMVER		( 0x1F  << FPCFG_offIMVER )
 #define FPCFG_mskAVER		( 0x1F  << FPCFG_offAVER )
 
+/* 8 Single precision or 4 double precision registers are available */
+#define SP8_DP4_reg		0
+/* 16 Single precision or 8 double precision registers are available */
+#define SP16_DP8_reg		1
+/* 32 Single precision or 16 double precision registers are available */
+#define SP32_DP16_reg		2
+/* 32 Single precision or 32 double precision registers are available */
+#define SP32_DP32_reg		3
+
 /******************************************************************************
  * fucpr: FUCOP_CTL (FPU and Coprocessor Enable Control Register)
  *****************************************************************************/

diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h
@@ -9,6 +9,7 @@
  */
 
 #include <asm/ptrace.h>
+#include <asm/fpu.h>
 
 typedef unsigned long elf_greg_t;
 typedef unsigned long elf_freg_t[3];
@@ -159,8 +160,18 @@ struct elf32_hdr;
 
 #endif
 
+
+#if IS_ENABLED(CONFIG_FPU)
+#define FPU_AUX_ENT	NEW_AUX_ENT(AT_FPUCW, FPCSR_INIT)
+#else
+#define FPU_AUX_ENT	NEW_AUX_ENT(AT_IGNORE, 0)
+#endif
+
 #define ARCH_DLINFO						\
 do {								\
+	/* Optional FPU initialization */			\
+	FPU_AUX_ENT;						\
+								\
 	NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
 		    (elf_addr_t)current->mm->context.vdso);	\
 } while (0)

diff --git a/arch/nds32/include/asm/fpu.h b/arch/nds32/include/asm/fpu.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __ASM_NDS32_FPU_H
+#define __ASM_NDS32_FPU_H
+
+#if IS_ENABLED(CONFIG_FPU)
+#ifndef __ASSEMBLY__
+#include <linux/sched/task_stack.h>
+#include <linux/preempt.h>
+#include <asm/ptrace.h>
+
+extern bool has_fpu;
+
+extern void save_fpu(struct task_struct *__tsk);
+extern void load_fpu(const struct fpu_struct *fpregs);
+extern bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs);
+extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu);
+
+#define test_tsk_fpu(regs)	(regs->fucop_ctl & FUCOP_CTL_mskCP0EN)
+
+/*
+ * Initially load the FPU with signalling NANS.  This bit pattern
+ * has the property that no matter whether considered as single or as
+ * double precision, it still represents a signalling NAN.
+ */
+
+#define sNAN64    0xFFFFFFFFFFFFFFFFULL
+#define sNAN32    0xFFFFFFFFUL
+
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+/*
+ * Denormalized number is unsupported by nds32 FPU. Hence the operation
+ * is treated as underflow cases when the final result is a denormalized
+ * number. To enhance precision, underflow exception trap should be
+ * enabled by default and kerenl will re-execute it by fpu emulator
+ * when getting underflow exception.
+ */
+#define FPCSR_INIT  FPCSR_mskUDFE
+#else
+#define FPCSR_INIT  0x0UL
+#endif
+
+extern const struct fpu_struct init_fpuregs;
+
+static inline void disable_ptreg_fpu(struct pt_regs *regs)
+{
+	regs->fucop_ctl &= ~FUCOP_CTL_mskCP0EN;
+}
+
+static inline void enable_ptreg_fpu(struct pt_regs *regs)
+{
+	regs->fucop_ctl |= FUCOP_CTL_mskCP0EN;
+}
+
+static inline void enable_fpu(void)
+{
+	unsigned long fucop_ctl;
+
+	fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) | FUCOP_CTL_mskCP0EN;
+	__nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
+	__nds32__isb();
+}
+
+static inline void disable_fpu(void)
+{
+	unsigned long fucop_ctl;
+
+	fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) & ~FUCOP_CTL_mskCP0EN;
+	__nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
+	__nds32__isb();
+}
+
+static inline void lose_fpu(void)
+{
+	preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+	if (last_task_used_math == current) {
+		last_task_used_math = NULL;
+#else
+	if (test_tsk_fpu(task_pt_regs(current))) {
+#endif
+		save_fpu(current);
+	}
+	disable_ptreg_fpu(task_pt_regs(current));
+	preempt_enable();
+}
+
+static inline void own_fpu(void)
+{
+	preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+	if (last_task_used_math != current) {
+		if (last_task_used_math != NULL)
+			save_fpu(last_task_used_math);
+		load_fpu(&current->thread.fpu);
+		last_task_used_math = current;
+	}
+#else
+	if (!test_tsk_fpu(task_pt_regs(current))) {
+		load_fpu(&current->thread.fpu);
+	}
+#endif
+	enable_ptreg_fpu(task_pt_regs(current));
+	preempt_enable();
+}
+
+#if !IS_ENABLED(CONFIG_LAZY_FPU)
+static inline void unlazy_fpu(struct task_struct *tsk)
+{
+	preempt_disable();
+	if (test_tsk_fpu(task_pt_regs(tsk)))
+		save_fpu(tsk);
+	preempt_enable();
+}
+#endif /* !CONFIG_LAZY_FPU */
+static inline void clear_fpu(struct pt_regs *regs)
+{
+	preempt_disable();
+	if (test_tsk_fpu(regs))
+		disable_ptreg_fpu(regs);
+	preempt_enable();
+}
+#endif /* CONFIG_FPU */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_NDS32_FPU_H */
diff --git a/arch/nds32/include/asm/fpuemu.h b/arch/nds32/include/asm/fpuemu.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __ARCH_NDS32_FPUEMU_H
+#define __ARCH_NDS32_FPUEMU_H
+
+/*
+ * single precision
+ */
+
+void fadds(void *ft, void *fa, void *fb);
+void fsubs(void *ft, void *fa, void *fb);
+void fmuls(void *ft, void *fa, void *fb);
+void fdivs(void *ft, void *fa, void *fb);
+void fs2d(void *ft, void *fa);
+void fsqrts(void *ft, void *fa);
+void fnegs(void *ft, void *fa);
+int fcmps(void *ft, void *fa, void *fb, int cop);
+
+/*
+ * double precision
+ */
+void faddd(void *ft, void *fa, void *fb);
+void fsubd(void *ft, void *fa, void *fb);
+void fmuld(void *ft, void *fa, void *fb);
+void fdivd(void *ft, void *fa, void *fb);
+void fsqrtd(void *ft, void *fa);
+void fd2s(void *ft, void *fa);
+void fnegd(void *ft, void *fa);
+int fcmpd(void *ft, void *fa, void *fb, int cop);
+
+#endif /* __ARCH_NDS32_FPUEMU_H */