diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 0595585013b0..efd89a88d2d0 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -18,22 +18,44 @@ #undef flush_icache_range #undef flush_icache_user_range +#undef flush_dcache_page static inline void local_flush_icache_all(void) { asm volatile ("fence.i" ::: "memory"); } +#define PG_dcache_clean PG_arch_1 + +static inline void flush_dcache_page(struct page *page) +{ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} + +/* + * RISC-V doesn't have an instruction to flush parts of the instruction cache, + * so instead we just flush the whole thing. + */ +#define flush_icache_range(start, end) flush_icache_all() +#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all() + #ifndef CONFIG_SMP -#define flush_icache_range(start, end) local_flush_icache_all() -#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all() +#define flush_icache_all() local_flush_icache_all() +#define flush_icache_mm(mm, local) flush_icache_all() #else /* CONFIG_SMP */ -#define flush_icache_range(start, end) sbi_remote_fence_i(0) -#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0) +#define flush_icache_all() sbi_remote_fence_i(0) +void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ +/* + * Bits in sys_riscv_flush_icache()'s flags argument. + */ +#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL +#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL) + #endif /* _ASM_RISCV_CACHEFLUSH_H */ diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 66805cba9a27..5df2dccdba12 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,6 +19,10 @@ typedef struct { void *vdso; +#ifdef CONFIG_SMP + /* A local icache flush is needed before user execution can resume. */ + cpumask_t icache_stale_mask; +#endif } mm_context_t; #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index de1fc1631fc4..b15b169e3d22 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,6 +20,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *task) @@ -46,12 +48,54 @@ static inline void set_pgdir(pgd_t *pgd) csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE); } +/* + * When necessary, performs a deferred icache flush for the given MM context, + * on the local CPU. RISC-V has no direct mechanism for instruction cache + * shoot downs, so instead we send an IPI that informs the remote harts they + * need to flush their local instruction caches. To avoid pathologically slow + * behavior in a common case (a bunch of single-hart processes on a many-hart + * machine, ie 'make -j') we avoid the IPIs for harts that are not currently + * executing a MM context and instead schedule a deferred local instruction + * cache flush to be performed before execution resumes on each hart. This + * actually performs that local instruction cache flush, which implicitly only + * refers to the current hart. + */ +static inline void flush_icache_deferred(struct mm_struct *mm) +{ +#ifdef CONFIG_SMP + unsigned int cpu = smp_processor_id(); + cpumask_t *mask = &mm->context.icache_stale_mask; + + if (cpumask_test_cpu(cpu, mask)) { + cpumask_clear_cpu(cpu, mask); + /* + * Ensure the remote hart's writes are visible to this hart. + * This pairs with a barrier in flush_icache_mm. + */ + smp_mb(); + local_flush_icache_all(); + } +#endif +} + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *task) { if (likely(prev != next)) { + /* + * Mark the current MM context as inactive, and the next as + * active. This is at least used by the icache flushing + * routines in order to determine who should + */ + unsigned int cpu = smp_processor_id(); + + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); + set_pgdir(next->pgd); local_flush_tlb_all(); + + flush_icache_deferred(next); } } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 3399257780b2..2cbd92ed1629 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr) #define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) #define pte_unmap(pte) ((void)(pte)) -/* - * Certain architectures need to do special things when PTEs within - * a page table are directly modified. Thus, the following hook is - * made available. - */ -static inline void set_pte(pte_t *ptep, pte_t pteval) -{ - *ptep = pteval; -} - -static inline void set_pte_at(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, pte_t pteval) -{ - set_pte(ptep, pteval); -} - -static inline void pte_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - set_pte_at(mm, addr, ptep, __pte(0)); -} - static inline int pte_present(pte_t pte) { return (pte_val(pte) & _PAGE_PRESENT); @@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte) return (pte_val(pte) == 0); } -/* static inline int pte_read(pte_t pte) */ - static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_exec(pte_t pte) +{ + return pte_val(pte) & _PAGE_EXEC; +} + static inline int pte_huge(pte_t pte) { return pte_present(pte) && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); } -/* static inline int pte_exec(pte_t pte) */ - static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; @@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) return pte_val(pte_a) == pte_val(pte_b); } +/* + * Certain architectures need to do special things when PTEs within + * a page table are directly modified. Thus, the following hook is + * made available. + */ +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; +} + +void flush_icache_pte(pte_t pte); + +static inline void set_pte_at(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pteval) +{ + if (pte_present(pteval) && pte_exec(pteval)) + flush_icache_pte(pteval); + + set_pte(ptep, pteval); +} + +static inline void pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + set_pte_at(mm, addr, ptep, __pte(0)); +} + #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS static inline int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index c79fab3d377d..715b0f10af58 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -17,6 +17,8 @@ #ifdef CONFIG_MMU +#include + /* * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction * cache as well, so a 'fence.i' is not necessary. diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h new file mode 100644 index 000000000000..a2ccf1894929 --- /dev/null +++ b/arch/riscv/include/asm/vdso-syscalls.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _ASM_RISCV_VDSO_SYSCALLS_H +#define _ASM_RISCV_VDSO_SYSCALLS_H + +#ifdef CONFIG_SMP + +/* These syscalls are only used by the vDSO and are not in the uapi. */ +#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) +__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) + +#endif + +#endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 602f61257553..541544d64c33 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -38,4 +38,8 @@ struct vdso_data { (void __user *)((unsigned long)(base) + __vdso_##name); \ }) +#ifdef CONFIG_SMP +asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); +#endif + #endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index b4a71ec5906f..1b27ade437b4 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -108,3 +108,51 @@ void smp_send_reschedule(int cpu) { send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); } + +/* + * Performs an icache flush for the given MM context. RISC-V has no direct + * mechanism for instruction cache shoot downs, so instead we send an IPI that + * informs the remote harts they need to flush their local instruction caches. + * To avoid pathologically slow behavior in a common case (a bunch of + * single-hart processes on a many-hart machine, ie 'make -j') we avoid the + * IPIs for harts that are not currently executing a MM context and instead + * schedule a deferred local instruction cache flush to be performed before + * execution resumes on each hart. + */ +void flush_icache_mm(struct mm_struct *mm, bool local) +{ + unsigned int cpu; + cpumask_t others, *mask; + + preempt_disable(); + + /* Mark every hart's icache as needing a flush for this MM. */ + mask = &mm->context.icache_stale_mask; + cpumask_setall(mask); + /* Flush this hart's I$ now, and mark it as flushed. */ + cpu = smp_processor_id(); + cpumask_clear_cpu(cpu, mask); + local_flush_icache_all(); + + /* + * Flush the I$ of other harts concurrently executing, and mark them as + * flushed. + */ + cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu)); + local |= cpumask_empty(&others); + if (mm != current->active_mm || !local) + sbi_remote_fence_i(others.bits); + else { + /* + * It's assumed that at least one strongly ordered operation is + * performed on this hart between setting a hart's cpumask bit + * and scheduling this MM context on that hart. Sending an SBI + * remote message will do this, but in the case where no + * messages are sent we still need to order this hart's writes + * with flush_icache_deferred(). + */ + smp_mb(); + } + + preempt_enable(); +} diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 4351be7d0533..a2ae936a093e 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -14,8 +14,8 @@ */ #include -#include #include +#include static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -47,3 +47,34 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12); } #endif /* !CONFIG_64BIT */ + +#ifdef CONFIG_SMP +/* + * Allows the instruction cache to be flushed from userspace. Despite RISC-V + * having a direct 'fence.i' instruction available to userspace (which we + * can't trap!), that's not actually viable when running on Linux because the + * kernel might schedule a process on another hart. There is no way for + * userspace to handle this without invoking the kernel (as it doesn't know the + * thread->hart mappings), so we've defined a RISC-V specific system call to + * flush the instruction cache. + * + * sys_riscv_flush_icache() is defined to flush the instruction cache over an + * address range, with the flush applying to either all threads or just the + * caller. We don't currently do anything with the address range, that's just + * in there for forwards compatibility. + */ +SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, + uintptr_t, flags) +{ + struct mm_struct *mm = current->mm; + bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0; + + /* Check the reserved flags. */ + if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL)) + return -EINVAL; + + flush_icache_mm(mm, local); + + return 0; +} +#endif diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index 4e30dc5fb593..a5bd6401f95e 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -15,6 +15,7 @@ #include #include #include +#include #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), @@ -22,4 +23,5 @@ void *sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = sys_ni_syscall, #include +#include }; diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 523d0a8ac8db..324568d33921 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -1,7 +1,12 @@ # Copied from arch/tile/kernel/vdso/Makefile # Symbols present in the vdso -vdso-syms = rt_sigreturn +vdso-syms = rt_sigreturn +vdso-syms += gettimeofday +vdso-syms += clock_gettime +vdso-syms += clock_getres +vdso-syms += getcpu +vdso-syms += flush_icache # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) diff --git a/arch/riscv/kernel/vdso/clock_getres.S b/arch/riscv/kernel/vdso/clock_getres.S new file mode 100644 index 000000000000..edf7e2339648 --- /dev/null +++ b/arch/riscv/kernel/vdso/clock_getres.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */ +ENTRY(__vdso_clock_getres) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_clock_getres + ecall + ret + .cfi_endproc +ENDPROC(__vdso_clock_getres) diff --git a/arch/riscv/kernel/vdso/clock_gettime.S b/arch/riscv/kernel/vdso/clock_gettime.S new file mode 100644 index 000000000000..aac65676c6d5 --- /dev/null +++ b/arch/riscv/kernel/vdso/clock_gettime.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */ +ENTRY(__vdso_clock_gettime) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_clock_gettime + ecall + ret + .cfi_endproc +ENDPROC(__vdso_clock_gettime) diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S new file mode 100644 index 000000000000..b0fbad74e873 --- /dev/null +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include + + .text +/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ +ENTRY(__vdso_flush_icache) + .cfi_startproc +#ifdef CONFIG_SMP + li a7, __NR_riscv_flush_icache + ecall +#else + fence.i + li a0, 0 +#endif + ret + .cfi_endproc +ENDPROC(__vdso_flush_icache) diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S new file mode 100644 index 000000000000..cc7e98924484 --- /dev/null +++ b/arch/riscv/kernel/vdso/getcpu.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */ +ENTRY(__vdso_getcpu) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_getcpu + ecall + ret + .cfi_endproc +ENDPROC(__vdso_getcpu) diff --git a/arch/riscv/kernel/vdso/gettimeofday.S b/arch/riscv/kernel/vdso/gettimeofday.S new file mode 100644 index 000000000000..da85d33e8990 --- /dev/null +++ b/arch/riscv/kernel/vdso/gettimeofday.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */ +ENTRY(__vdso_gettimeofday) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_gettimeofday + ecall + ret + .cfi_endproc +ENDPROC(__vdso_gettimeofday) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 8c9dce95c11d..cd1d47e0724b 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -70,8 +70,11 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; - __vdso_cmpxchg32; - __vdso_cmpxchg64; + __vdso_gettimeofday; + __vdso_clock_gettime; + __vdso_clock_getres; + __vdso_getcpu; + __vdso_flush_icache; local: *; }; } diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 81f7d9ce6d88..eb22ab49b3e0 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -2,3 +2,4 @@ obj-y += init.o obj-y += fault.o obj-y += extable.o obj-y += ioremap.o +obj-y += cacheflush.o diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c new file mode 100644 index 000000000000..498c0a0814fe --- /dev/null +++ b/arch/riscv/mm/cacheflush.c @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + +void flush_icache_pte(pte_t pte) +{ + struct page *page = pte_page(pte); + + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + flush_icache_all(); +}