Skip to content

Commit

Permalink
Merge patch series "riscv: tlb flush improvements"
Browse files Browse the repository at this point in the history
Alexandre Ghiti <[email protected]> says:

This series optimizes the tlb flushes on riscv which used to simply
flush the whole tlb whatever the size of the range to flush or the size
of the stride.

Patch 3 introduces a threshold that is microarchitecture specific and
will very likely be modified by vendors, not sure though which mechanism
we'll use to do that (dt? alternatives? vendor initialization code?).

* b4-shazam-merge:
  riscv: Improve flush_tlb_kernel_range()
  riscv: Make __flush_tlb_range() loop over pte instead of flushing the whole tlb
  riscv: Improve flush_tlb_range() for hugetlb pages
  riscv: Improve tlb_flush()

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Palmer Dabbelt <[email protected]>
  • Loading branch information
palmer-dabbelt committed Nov 6, 2023
2 parents dbfbda3 + 5e22bfd commit 9ba91d1
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 95 deletions.
3 changes: 0 additions & 3 deletions arch/riscv/include/asm/sbi.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,6 @@ void sbi_set_timer(uint64_t stime_value);
void sbi_shutdown(void);
void sbi_send_ipi(unsigned int cpu);
int sbi_remote_fence_i(const struct cpumask *cpu_mask);
int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);

int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long start,
Expand Down
8 changes: 7 additions & 1 deletion arch/riscv/include/asm/tlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@ static void tlb_flush(struct mmu_gather *tlb);

static inline void tlb_flush(struct mmu_gather *tlb)
{
flush_tlb_mm(tlb->mm);
#ifdef CONFIG_MMU
if (tlb->fullmm || tlb->need_flush_all)
flush_tlb_mm(tlb->mm);
else
flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end,
tlb_get_unmap_size(tlb));
#endif
}

#endif /* _ASM_RISCV_TLB_H */
15 changes: 11 additions & 4 deletions arch/riscv/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include <asm/smp.h>
#include <asm/errata_list.h>

#define FLUSH_TLB_MAX_SIZE ((unsigned long)-1)
#define FLUSH_TLB_NO_ASID ((unsigned long)-1)

#ifdef CONFIG_MMU
extern unsigned long asid_mask;

Expand All @@ -32,9 +35,12 @@ static inline void local_flush_tlb_page(unsigned long addr)
#if defined(CONFIG_SMP) && defined(CONFIG_MMU)
void flush_tlb_all(void);
void flush_tlb_mm(struct mm_struct *mm);
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned int page_size);
void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
void flush_tlb_kernel_range(unsigned long start, unsigned long end);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
Expand All @@ -51,14 +57,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
local_flush_tlb_all();
}

#define flush_tlb_mm(mm) flush_tlb_all()
#endif /* !CONFIG_SMP || !CONFIG_MMU */

/* Flush a range of kernel pages */
static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
{
flush_tlb_all();
local_flush_tlb_all();
}

#define flush_tlb_mm(mm) flush_tlb_all()
#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
#endif /* !CONFIG_SMP || !CONFIG_MMU */

#endif /* _ASM_RISCV_TLBFLUSH_H */
32 changes: 10 additions & 22 deletions arch/riscv/kernel/sbi.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <linux/reboot.h>
#include <asm/sbi.h>
#include <asm/smp.h>
#include <asm/tlbflush.h>

/* default SBI version is 0.1 */
unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
Expand Down Expand Up @@ -376,32 +377,15 @@ int sbi_remote_fence_i(const struct cpumask *cpu_mask)
}
EXPORT_SYMBOL(sbi_remote_fence_i);

/**
* sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote
* harts for the specified virtual address range.
* @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma);

/**
* sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given
* remote harts for a virtual address range belonging to a specific ASID.
* remote harts for a virtual address range belonging to a specific ASID or not.
*
* @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
* @asid: The value of address space identifier (ASID).
* @asid: The value of address space identifier (ASID), or FLUSH_TLB_NO_ASID
* for flushing all address spaces.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
Expand All @@ -410,8 +394,12 @@ int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long size,
unsigned long asid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
cpu_mask, start, size, asid, 0);
if (asid == FLUSH_TLB_NO_ASID)
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
cpu_mask, start, size, 0, 0);
else
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
cpu_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);

Expand Down
181 changes: 116 additions & 65 deletions arch/riscv/mm/tlbflush.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,67 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/sched.h>
#include <linux/hugetlb.h>
#include <asm/sbi.h>
#include <asm/mmu_context.h>

static inline void local_flush_tlb_all_asid(unsigned long asid)
{
__asm__ __volatile__ ("sfence.vma x0, %0"
:
: "r" (asid)
: "memory");
if (asid != FLUSH_TLB_NO_ASID)
__asm__ __volatile__ ("sfence.vma x0, %0"
:
: "r" (asid)
: "memory");
else
local_flush_tlb_all();
}

static inline void local_flush_tlb_page_asid(unsigned long addr,
unsigned long asid)
{
__asm__ __volatile__ ("sfence.vma %0, %1"
:
: "r" (addr), "r" (asid)
: "memory");
if (asid != FLUSH_TLB_NO_ASID)
__asm__ __volatile__ ("sfence.vma %0, %1"
:
: "r" (addr), "r" (asid)
: "memory");
else
local_flush_tlb_page(addr);
}

static inline void local_flush_tlb_range(unsigned long start,
unsigned long size, unsigned long stride)
/*
* Flush entire TLB if number of entries to be flushed is greater
* than the threshold below.
*/
static unsigned long tlb_flush_all_threshold __read_mostly = 64;

static void local_flush_tlb_range_threshold_asid(unsigned long start,
unsigned long size,
unsigned long stride,
unsigned long asid)
{
if (size <= stride)
local_flush_tlb_page(start);
else
local_flush_tlb_all();
unsigned long nr_ptes_in_range = DIV_ROUND_UP(size, stride);
int i;

if (nr_ptes_in_range > tlb_flush_all_threshold) {
local_flush_tlb_all_asid(asid);
return;
}

for (i = 0; i < nr_ptes_in_range; ++i) {
local_flush_tlb_page_asid(start, asid);
start += stride;
}
}

static inline void local_flush_tlb_range_asid(unsigned long start,
unsigned long size, unsigned long stride, unsigned long asid)
{
if (size <= stride)
local_flush_tlb_page_asid(start, asid);
else
else if (size == FLUSH_TLB_MAX_SIZE)
local_flush_tlb_all_asid(asid);
else
local_flush_tlb_range_threshold_asid(start, size, stride, asid);
}

static void __ipi_flush_tlb_all(void *info)
Expand All @@ -51,7 +76,7 @@ void flush_tlb_all(void)
if (riscv_use_ipi_for_rfence())
on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
else
sbi_remote_sfence_vma(NULL, 0, -1);
sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
}

struct flush_tlb_range_data {
Expand All @@ -68,68 +93,62 @@ static void __ipi_flush_tlb_range_asid(void *info)
local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
}

static void __ipi_flush_tlb_range(void *info)
{
struct flush_tlb_range_data *d = info;

local_flush_tlb_range(d->start, d->size, d->stride);
}

static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
unsigned long size, unsigned long stride)
{
struct flush_tlb_range_data ftd;
struct cpumask *cmask = mm_cpumask(mm);
unsigned int cpuid;
const struct cpumask *cmask;
unsigned long asid = FLUSH_TLB_NO_ASID;
bool broadcast;

if (cpumask_empty(cmask))
return;
if (mm) {
unsigned int cpuid;

cpuid = get_cpu();
/* check if the tlbflush needs to be sent to other CPUs */
broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
if (static_branch_unlikely(&use_asid_allocator)) {
unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;

if (broadcast) {
if (riscv_use_ipi_for_rfence()) {
ftd.asid = asid;
ftd.start = start;
ftd.size = size;
ftd.stride = stride;
on_each_cpu_mask(cmask,
__ipi_flush_tlb_range_asid,
&ftd, 1);
} else
sbi_remote_sfence_vma_asid(cmask,
start, size, asid);
} else {
local_flush_tlb_range_asid(start, size, stride, asid);
}
cmask = mm_cpumask(mm);
if (cpumask_empty(cmask))
return;

cpuid = get_cpu();
/* check if the tlbflush needs to be sent to other CPUs */
broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;

if (static_branch_unlikely(&use_asid_allocator))
asid = atomic_long_read(&mm->context.id) & asid_mask;
} else {
if (broadcast) {
if (riscv_use_ipi_for_rfence()) {
ftd.asid = 0;
ftd.start = start;
ftd.size = size;
ftd.stride = stride;
on_each_cpu_mask(cmask,
__ipi_flush_tlb_range,
&ftd, 1);
} else
sbi_remote_sfence_vma(cmask, start, size);
} else {
local_flush_tlb_range(start, size, stride);
}
cmask = cpu_online_mask;
broadcast = true;
}

put_cpu();
if (broadcast) {
if (riscv_use_ipi_for_rfence()) {
ftd.asid = asid;
ftd.start = start;
ftd.size = size;
ftd.stride = stride;
on_each_cpu_mask(cmask,
__ipi_flush_tlb_range_asid,
&ftd, 1);
} else
sbi_remote_sfence_vma_asid(cmask,
start, size, asid);
} else {
local_flush_tlb_range_asid(start, size, stride, asid);
}

if (mm)
put_cpu();
}

void flush_tlb_mm(struct mm_struct *mm)
{
__flush_tlb_range(mm, 0, -1, PAGE_SIZE);
__flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
}

void flush_tlb_mm_range(struct mm_struct *mm,
unsigned long start, unsigned long end,
unsigned int page_size)
{
__flush_tlb_range(mm, start, end - start, page_size);
}

void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
Expand All @@ -140,8 +159,40 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
__flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE);
unsigned long stride_size;

if (!is_vm_hugetlb_page(vma)) {
stride_size = PAGE_SIZE;
} else {
stride_size = huge_page_size(hstate_vma(vma));

/*
* As stated in the privileged specification, every PTE in a
* NAPOT region must be invalidated, so reset the stride in that
* case.
*/
if (has_svnapot()) {
if (stride_size >= PGDIR_SIZE)
stride_size = PGDIR_SIZE;
else if (stride_size >= P4D_SIZE)
stride_size = P4D_SIZE;
else if (stride_size >= PUD_SIZE)
stride_size = PUD_SIZE;
else if (stride_size >= PMD_SIZE)
stride_size = PMD_SIZE;
else
stride_size = PAGE_SIZE;
}
}

__flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
}

void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
__flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
Expand Down

0 comments on commit 9ba91d1

Please sign in to comment.