Skip to content

Commit

Permalink
mm: huge_memory: a new debugfs interface for splitting THP tests
Browse files Browse the repository at this point in the history
We did not have a direct user interface of splitting the compound page
backing a THP and there is no need unless we want to expose the THP
implementation details to users.  Make <debugfs>/split_huge_pages accept a
new command to do that.

By writing "<pid>,<vaddr_start>,<vaddr_end>" to
<debugfs>/split_huge_pages, THPs within the given virtual address range
from the process with the given pid are split. It is used to test
split_huge_page function. In addition, a selftest program is added to
tools/testing/selftests/vm to utilize the interface by splitting
PMD THPs and PTE-mapped THPs.

This does not change the old behavior, i.e., writing 1 to the interface
to split all THPs in the system.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Zi Yan <[email protected]>
Reviewed-by: Yang Shi <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: John Hubbard <[email protected]>
Cc: "Kirill A . Shutemov" <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Mika Penttila <[email protected]>
Cc: Sandipan Das <[email protected]>
Cc: Shuah Khan <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
x-y-z authored and torvalds committed May 5, 2021
1 parent 75f8378 commit fa6c023
Show file tree
Hide file tree
Showing 4 changed files with 467 additions and 8 deletions.
155 changes: 147 additions & 8 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/sched/numa_balancing.h>
#include <linux/highmem.h>
Expand Down Expand Up @@ -2915,16 +2916,14 @@ static struct shrinker deferred_split_shrinker = {
};

#ifdef CONFIG_DEBUG_FS
static int split_huge_pages_set(void *data, u64 val)
static void split_huge_pages_all(void)
{
struct zone *zone;
struct page *page;
unsigned long pfn, max_zone_pfn;
unsigned long total = 0, split = 0;

if (val != 1)
return -EINVAL;

pr_debug("Split all THPs\n");
for_each_populated_zone(zone) {
max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
Expand All @@ -2948,15 +2947,155 @@ static int split_huge_pages_set(void *data, u64 val)
unlock_page(page);
next:
put_page(page);
cond_resched();
}
}

pr_info("%lu of %lu THP split\n", split, total);
pr_debug("%lu of %lu THP split\n", split, total);
}

return 0;
static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)
{
return vma_is_special_huge(vma) || (vma->vm_flags & VM_IO) ||
is_vm_hugetlb_page(vma);
}

static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
unsigned long vaddr_end)
{
int ret = 0;
struct task_struct *task;
struct mm_struct *mm;
unsigned long total = 0, split = 0;
unsigned long addr;

vaddr_start &= PAGE_MASK;
vaddr_end &= PAGE_MASK;

/* Find the task_struct from pid */
rcu_read_lock();
task = find_task_by_vpid(pid);
if (!task) {
rcu_read_unlock();
ret = -ESRCH;
goto out;
}
get_task_struct(task);
rcu_read_unlock();

/* Find the mm_struct */
mm = get_task_mm(task);
put_task_struct(task);

if (!mm) {
ret = -EINVAL;
goto out;
}

pr_debug("Split huge pages in pid: %d, vaddr: [0x%lx - 0x%lx]\n",
pid, vaddr_start, vaddr_end);

mmap_read_lock(mm);
/*
* always increase addr by PAGE_SIZE, since we could have a PTE page
* table filled with PTE-mapped THPs, each of which is distinct.
*/
for (addr = vaddr_start; addr < vaddr_end; addr += PAGE_SIZE) {
struct vm_area_struct *vma = find_vma(mm, addr);
unsigned int follflags;
struct page *page;

if (!vma || addr < vma->vm_start)
break;

/* skip special VMA and hugetlb VMA */
if (vma_not_suitable_for_thp_split(vma)) {
addr = vma->vm_end;
continue;
}

/* FOLL_DUMP to ignore special (like zero) pages */
follflags = FOLL_GET | FOLL_DUMP;
page = follow_page(vma, addr, follflags);

if (IS_ERR(page))
continue;
if (!page)
continue;

if (!is_transparent_hugepage(page))
goto next;

total++;
if (!can_split_huge_page(compound_head(page), NULL))
goto next;

if (!trylock_page(page))
goto next;

if (!split_huge_page(page))
split++;

unlock_page(page);
next:
put_page(page);
cond_resched();
}
mmap_read_unlock(mm);
mmput(mm);

pr_debug("%lu of %lu THP split\n", split, total);

out:
return ret;
}
DEFINE_DEBUGFS_ATTRIBUTE(split_huge_pages_fops, NULL, split_huge_pages_set,
"%llu\n");

#define MAX_INPUT_BUF_SZ 255

static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppops)
{
static DEFINE_MUTEX(split_debug_mutex);
ssize_t ret;
char input_buf[MAX_INPUT_BUF_SZ]; /* hold pid, start_vaddr, end_vaddr */
int pid;
unsigned long vaddr_start, vaddr_end;

ret = mutex_lock_interruptible(&split_debug_mutex);
if (ret)
return ret;

ret = -EFAULT;

memset(input_buf, 0, MAX_INPUT_BUF_SZ);
if (copy_from_user(input_buf, buf, min_t(size_t, count, MAX_INPUT_BUF_SZ)))
goto out;

input_buf[MAX_INPUT_BUF_SZ - 1] = '\0';
ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end);
if (ret == 1 && pid == 1) {
split_huge_pages_all();
ret = strlen(input_buf);
goto out;
} else if (ret != 3) {
ret = -EINVAL;
goto out;
}

ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end);
if (!ret)
ret = strlen(input_buf);
out:
mutex_unlock(&split_debug_mutex);
return ret;

}

static const struct file_operations split_huge_pages_fops = {
.owner = THIS_MODULE,
.write = split_huge_pages_write,
.llseek = no_llseek,
};

static int __init split_huge_pages_debugfs(void)
{
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/vm/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ map_fixed_noreplace
write_to_hugetlbfs
hmm-tests
local_config.*
split_huge_page_test
1 change: 1 addition & 0 deletions tools/testing/selftests/vm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
TEST_GEN_FILES += split_huge_page_test

ifeq ($(MACHINE),x86_64)
CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
Expand Down
Loading

0 comments on commit fa6c023

Please sign in to comment.