Skip to content

Commit

Permalink
Merge tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/greentime/linux

Pull nds32 updates from Greentime Hu:

 - Perf support

 - Power management support

 - FPU support

 - Hardware prefetcher support

 - Build error fixed

 - Performance enhancement

* tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux:
  nds32: support hardware prefetcher
  nds32: Fix the items of hwcap_str ordering issue.
  math-emu/soft-fp.h: (_FP_ROUND_ZERO) cast 0 to void to fix warning
  math-emu/op-2.h: Use statement expressions to prevent negative constant shift
  nds32: support denormalized result through FP emulator
  nds32: Support FP emulation
  nds32: nds32 FPU port
  nds32: Remove duplicated include from pm.c
  nds32: Power management for nds32
  nds32: Add document for NDS32 PMU.
  nds32: Add perf call-graph support.
  nds32: Perf porting
  nds32: Fix bug in bitfield.h
  nds32: Fix gcc 8.0 compiler option incompatible.
  nds32: Fill all TLB entries with kernel image mapping
  nds32: Remove the redundant assignment
  • Loading branch information
torvalds committed Dec 29, 2018
2 parents 903b77c + e2f3f8b commit 889bb74
Show file tree
Hide file tree
Showing 65 changed files with 4,441 additions and 89 deletions.
17 changes: 17 additions & 0 deletions Documentation/devicetree/bindings/perf/nds32v3-pmu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
* NDS32 Performance Monitor Units

NDS32 core have a PMU for counting cpu and cache events like cache misses.
The NDS32 PMU representation in the device tree should be done as under:

Required properties:

- compatible :
"andestech,nds32v3-pmu"

- interrupts : The interrupt number for NDS32 PMU is 13.

Example:
pmu{
compatible = "andestech,nds32v3-pmu";
interrupts = <13>;
}
12 changes: 12 additions & 0 deletions arch/nds32/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ config NDS32
select HANDLE_DOMAIN_IRQ
select HAVE_ARCH_TRACEHOOK
select HAVE_DEBUG_KMEMLEAK
select HAVE_EXIT_THREAD
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_PERF_EVENTS
select IRQ_DOMAIN
select LOCKDEP_SUPPORT
select MODULES_USE_ELF_RELA
Expand Down Expand Up @@ -91,3 +93,13 @@ endmenu
menu "Kernel Features"
source "kernel/Kconfig.hz"
endmenu

menu "Power management options"
config SYS_SUPPORTS_APM_EMULATION
bool

config ARCH_SUSPEND_POSSIBLE
def_bool y

source "kernel/power/Kconfig"
endmenu
41 changes: 41 additions & 0 deletions arch/nds32/Kconfig.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,40 @@ config CPU_LITTLE_ENDIAN
bool "Little endian"
default y

config FPU
bool "FPU support"
default n
help
If FPU ISA is used in user space, this configuration shall be Y to
enable required support in kerenl such as fpu context switch and
fpu exception handler.

If no FPU ISA is used in user space, say N.

config LAZY_FPU
bool "lazy FPU support"
depends on FPU
default y
help
Say Y here to enable the lazy FPU scheme. The lazy FPU scheme can
enhance system performance by reducing the context switch
frequency of the FPU register.

For nomal case, say Y.

config SUPPORT_DENORMAL_ARITHMETIC
bool "Denormal arithmetic support"
depends on FPU
default n
help
Say Y here to enable arithmetic of denormalized number. Enabling
this feature can enhance the precision for tininess number.
However, performance loss in float pointe calculations is
possibly significant due to additional FPU exception.

If the calculated tolerance for tininess number is not critical,
say N to prevent performance loss.

config HWZOL
bool "hardware zero overhead loop support"
depends on CPU_D10 || CPU_D15
Expand Down Expand Up @@ -143,6 +177,13 @@ config CACHE_L2
Say Y here to enable L2 cache if your SoC are integrated with L2CC.
If unsure, say N.

config HW_PRE
bool "Enable hardware prefetcher"
default y
help
Say Y here to enable hardware prefetcher feature.
Only when CPU_VER.REV >= 0x09 can support.

menu "Memory configuration"

choice
Expand Down
5 changes: 5 additions & 0 deletions arch/nds32/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@ KBUILD_DEFCONFIG := defconfig

comma = ,


ifdef CONFIG_FUNCTION_TRACER
arch-y += -malways-save-lp -mno-relax
endif

# Avoid generating FPU instructions
arch-y += -mno-ext-fpu-sp -mno-ext-fpu-dp -mfloat-abi=soft

KBUILD_CFLAGS += $(call cc-option, -mno-sched-prolog-epilog)
KBUILD_CFLAGS += -mcmodel=large

Expand All @@ -26,6 +30,7 @@ export TEXTADDR

# If we have a machine-specific directory, then include it in the build.
core-y += arch/nds32/kernel/ arch/nds32/mm/
core-$(CONFIG_FPU) += arch/nds32/math-emu/
libs-y += arch/nds32/lib/

ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""'
Expand Down
5 changes: 5 additions & 0 deletions arch/nds32/boot/dts/ae3xx.dts
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,9 @@
interrupts = <18>;
};
};

pmu {
compatible = "andestech,nds32v3-pmu";
interrupts= <13>;
};
};
1 change: 1 addition & 0 deletions arch/nds32/include/asm/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ generic-y += kprobes.h
generic-y += kvm_para.h
generic-y += limits.h
generic-y += local.h
generic-y += local64.h
generic-y += mm-arch-hooks.h
generic-y += mman.h
generic-y += parport.h
Expand Down
25 changes: 23 additions & 2 deletions arch/nds32/include/asm/bitfield.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,11 @@
#define ITYPE_mskSTYPE ( 0xF << ITYPE_offSTYPE )
#define ITYPE_mskCPID ( 0x3 << ITYPE_offCPID )

/* Additional definitions of ITYPE register for FPU */
#define FPU_DISABLE_EXCEPTION (0x1 << ITYPE_offSTYPE)
#define FPU_EXCEPTION (0x2 << ITYPE_offSTYPE)
#define FPU_CPID 0 /* FPU Co-Processor ID is 0 */

#define NDS32_VECTOR_mskNONEXCEPTION 0x78
#define NDS32_VECTOR_offEXCEPTION 8
#define NDS32_VECTOR_offINTERRUPT 9
Expand Down Expand Up @@ -692,8 +697,8 @@
#define PFM_CTL_offKU1 13 /* Enable user mode event counting for PFMC1 */
#define PFM_CTL_offKU2 14 /* Enable user mode event counting for PFMC2 */
#define PFM_CTL_offSEL0 15 /* The event selection for PFMC0 */
#define PFM_CTL_offSEL1 21 /* The event selection for PFMC1 */
#define PFM_CTL_offSEL2 27 /* The event selection for PFMC2 */
#define PFM_CTL_offSEL1 16 /* The event selection for PFMC1 */
#define PFM_CTL_offSEL2 22 /* The event selection for PFMC2 */
/* bit 28:31 reserved */

#define PFM_CTL_mskEN0 ( 0x01 << PFM_CTL_offEN0 )
Expand Down Expand Up @@ -735,14 +740,20 @@
#define N13MISC_CTL_offRTP 1 /* Disable Return Target Predictor */
#define N13MISC_CTL_offPTEPF 2 /* Disable HPTWK L2 PTE pefetch */
#define N13MISC_CTL_offSP_SHADOW_EN 4 /* Enable shadow stack pointers */
#define MISC_CTL_offHWPRE 11 /* Enable HardWare PREFETCH */
/* bit 6, 9:31 reserved */

#define N13MISC_CTL_makBTB ( 0x1 << N13MISC_CTL_offBTB )
#define N13MISC_CTL_makRTP ( 0x1 << N13MISC_CTL_offRTP )
#define N13MISC_CTL_makPTEPF ( 0x1 << N13MISC_CTL_offPTEPF )
#define N13MISC_CTL_makSP_SHADOW_EN ( 0x1 << N13MISC_CTL_offSP_SHADOW_EN )
#define MISC_CTL_makHWPRE_EN ( 0x1 << MISC_CTL_offHWPRE )

#ifdef CONFIG_HW_PRE
#define MISC_init (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN|MISC_CTL_makHWPRE_EN)
#else
#define MISC_init (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN)
#endif

/******************************************************************************
* PRUSR_ACC_CTL (Privileged Resource User Access Control Registers)
Expand Down Expand Up @@ -926,6 +937,7 @@
#define FPCSR_mskDNIT ( 0x1 << FPCSR_offDNIT )
#define FPCSR_mskRIT ( 0x1 << FPCSR_offRIT )
#define FPCSR_mskALL (FPCSR_mskIVO | FPCSR_mskDBZ | FPCSR_mskOVF | FPCSR_mskUDF | FPCSR_mskIEX)
#define FPCSR_mskALLE_NO_UDFE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskIEXE)
#define FPCSR_mskALLE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskUDFE | FPCSR_mskIEXE)
#define FPCSR_mskALLT (FPCSR_mskIVOT | FPCSR_mskDBZT | FPCSR_mskOVFT | FPCSR_mskUDFT | FPCSR_mskIEXT |FPCSR_mskDNIT | FPCSR_mskRIT)

Expand All @@ -946,6 +958,15 @@
#define FPCFG_mskIMVER ( 0x1F << FPCFG_offIMVER )
#define FPCFG_mskAVER ( 0x1F << FPCFG_offAVER )

/* 8 Single precision or 4 double precision registers are available */
#define SP8_DP4_reg 0
/* 16 Single precision or 8 double precision registers are available */
#define SP16_DP8_reg 1
/* 32 Single precision or 16 double precision registers are available */
#define SP32_DP16_reg 2
/* 32 Single precision or 32 double precision registers are available */
#define SP32_DP32_reg 3

/******************************************************************************
* fucpr: FUCOP_CTL (FPU and Coprocessor Enable Control Register)
*****************************************************************************/
Expand Down
11 changes: 11 additions & 0 deletions arch/nds32/include/asm/elf.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*/

#include <asm/ptrace.h>
#include <asm/fpu.h>

typedef unsigned long elf_greg_t;
typedef unsigned long elf_freg_t[3];
Expand Down Expand Up @@ -159,8 +160,18 @@ struct elf32_hdr;

#endif


#if IS_ENABLED(CONFIG_FPU)
#define FPU_AUX_ENT NEW_AUX_ENT(AT_FPUCW, FPCSR_INIT)
#else
#define FPU_AUX_ENT NEW_AUX_ENT(AT_IGNORE, 0)
#endif

#define ARCH_DLINFO \
do { \
/* Optional FPU initialization */ \
FPU_AUX_ENT; \
\
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(elf_addr_t)current->mm->context.vdso); \
} while (0)
Expand Down
126 changes: 126 additions & 0 deletions arch/nds32/include/asm/fpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2005-2018 Andes Technology Corporation */

#ifndef __ASM_NDS32_FPU_H
#define __ASM_NDS32_FPU_H

#if IS_ENABLED(CONFIG_FPU)
#ifndef __ASSEMBLY__
#include <linux/sched/task_stack.h>
#include <linux/preempt.h>
#include <asm/ptrace.h>

extern bool has_fpu;

extern void save_fpu(struct task_struct *__tsk);
extern void load_fpu(const struct fpu_struct *fpregs);
extern bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs);
extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu);

#define test_tsk_fpu(regs) (regs->fucop_ctl & FUCOP_CTL_mskCP0EN)

/*
* Initially load the FPU with signalling NANS. This bit pattern
* has the property that no matter whether considered as single or as
* double precision, it still represents a signalling NAN.
*/

#define sNAN64 0xFFFFFFFFFFFFFFFFULL
#define sNAN32 0xFFFFFFFFUL

#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
/*
* Denormalized number is unsupported by nds32 FPU. Hence the operation
* is treated as underflow cases when the final result is a denormalized
* number. To enhance precision, underflow exception trap should be
* enabled by default and kerenl will re-execute it by fpu emulator
* when getting underflow exception.
*/
#define FPCSR_INIT FPCSR_mskUDFE
#else
#define FPCSR_INIT 0x0UL
#endif

extern const struct fpu_struct init_fpuregs;

static inline void disable_ptreg_fpu(struct pt_regs *regs)
{
regs->fucop_ctl &= ~FUCOP_CTL_mskCP0EN;
}

static inline void enable_ptreg_fpu(struct pt_regs *regs)
{
regs->fucop_ctl |= FUCOP_CTL_mskCP0EN;
}

static inline void enable_fpu(void)
{
unsigned long fucop_ctl;

fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) | FUCOP_CTL_mskCP0EN;
__nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
__nds32__isb();
}

static inline void disable_fpu(void)
{
unsigned long fucop_ctl;

fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) & ~FUCOP_CTL_mskCP0EN;
__nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
__nds32__isb();
}

static inline void lose_fpu(void)
{
preempt_disable();
#if IS_ENABLED(CONFIG_LAZY_FPU)
if (last_task_used_math == current) {
last_task_used_math = NULL;
#else
if (test_tsk_fpu(task_pt_regs(current))) {
#endif
save_fpu(current);
}
disable_ptreg_fpu(task_pt_regs(current));
preempt_enable();
}

static inline void own_fpu(void)
{
preempt_disable();
#if IS_ENABLED(CONFIG_LAZY_FPU)
if (last_task_used_math != current) {
if (last_task_used_math != NULL)
save_fpu(last_task_used_math);
load_fpu(&current->thread.fpu);
last_task_used_math = current;
}
#else
if (!test_tsk_fpu(task_pt_regs(current))) {
load_fpu(&current->thread.fpu);
}
#endif
enable_ptreg_fpu(task_pt_regs(current));
preempt_enable();
}

#if !IS_ENABLED(CONFIG_LAZY_FPU)
static inline void unlazy_fpu(struct task_struct *tsk)
{
preempt_disable();
if (test_tsk_fpu(task_pt_regs(tsk)))
save_fpu(tsk);
preempt_enable();
}
#endif /* !CONFIG_LAZY_FPU */
static inline void clear_fpu(struct pt_regs *regs)
{
preempt_disable();
if (test_tsk_fpu(regs))
disable_ptreg_fpu(regs);
preempt_enable();
}
#endif /* CONFIG_FPU */
#endif /* __ASSEMBLY__ */
#endif /* __ASM_NDS32_FPU_H */
32 changes: 32 additions & 0 deletions arch/nds32/include/asm/fpuemu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2005-2018 Andes Technology Corporation */

#ifndef __ARCH_NDS32_FPUEMU_H
#define __ARCH_NDS32_FPUEMU_H

/*
* single precision
*/

void fadds(void *ft, void *fa, void *fb);
void fsubs(void *ft, void *fa, void *fb);
void fmuls(void *ft, void *fa, void *fb);
void fdivs(void *ft, void *fa, void *fb);
void fs2d(void *ft, void *fa);
void fsqrts(void *ft, void *fa);
void fnegs(void *ft, void *fa);
int fcmps(void *ft, void *fa, void *fb, int cop);

/*
* double precision
*/
void faddd(void *ft, void *fa, void *fb);
void fsubd(void *ft, void *fa, void *fb);
void fmuld(void *ft, void *fa, void *fb);
void fdivd(void *ft, void *fa, void *fb);
void fsqrtd(void *ft, void *fa);
void fd2s(void *ft, void *fa);
void fnegd(void *ft, void *fa);
int fcmpd(void *ft, void *fa, void *fb, int cop);

#endif /* __ARCH_NDS32_FPUEMU_H */
Loading

0 comments on commit 889bb74

Please sign in to comment.