Skip to content

Commit

Permalink
mm: make swapin readahead to improve thp collapse rate
Browse files Browse the repository at this point in the history
This patch makes swapin readahead to improve thp collapse rate.  When
khugepaged scanned pages, there can be a few of the pages in swap area.

With the patch THP can collapse 4kB pages into a THP when there are up
to max_ptes_swap swap ptes in a 2MB range.

The patch was tested with a test program that allocates 400B of memory,
writes to it, and then sleeps.  I force the system to swap out all.
Afterwards, the test program touches the area by writing, it skips a
page in each 20 pages of the area.

Without the patch, system did not swap in readahead.  THP rate was %65
of the program of the memory, it did not change over time.

With this patch, after 10 minutes of waiting khugepaged had collapsed
%99 of the program's memory.

[[email protected]: trivial cleanup of exit path of the function]
[[email protected]: __collapse_huge_page_swapin(): drop unused 'pte' parameter]
[[email protected]: do not hold anon_vma lock during swap in]
Signed-off-by: Ebru Akagunduz <[email protected]>
Acked-by: Rik van Riel <[email protected]>
Cc: Naoya Horiguchi <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Cc: Xie XiuQi <[email protected]>
Cc: Cyrill Gorcunov <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Aneesh Kumar K.V <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Michal Hocko <[email protected]>
Signed-off-by: Kirill A. Shutemov <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
ebruAkagunduz authored and torvalds committed Jul 26, 2016
1 parent 70652f6 commit 8a966ed
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 4 deletions.
24 changes: 24 additions & 0 deletions include/trace/events/huge_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,5 +135,29 @@ TRACE_EVENT(mm_collapse_huge_page_isolate,
__print_symbolic(__entry->status, SCAN_STATUS))
);

TRACE_EVENT(mm_collapse_huge_page_swapin,

TP_PROTO(struct mm_struct *mm, int swapped_in, int ret),

TP_ARGS(mm, swapped_in, ret),

TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(int, swapped_in)
__field(int, ret)
),

TP_fast_assign(
__entry->mm = mm;
__entry->swapped_in = swapped_in;
__entry->ret = ret;
),

TP_printk("mm=%p, swapped_in=%d, ret=%d",
__entry->mm,
__entry->swapped_in,
__entry->ret)
);

#endif /* __HUGE_MEMORY_H */
#include <trace/define_trace.h>
43 changes: 40 additions & 3 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2373,6 +2373,44 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
return !(vma->vm_flags & VM_NO_THP);
}

/*
* Bring missing pages in from swap, to complete THP collapse.
* Only done if khugepaged_scan_pmd believes it is worthwhile.
*
* Called and returns without pte mapped or spinlocks held,
* but with mmap_sem held to protect against vma changes.
*/

static void __collapse_huge_page_swapin(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd)
{
unsigned long _address;
pte_t *pte, pteval;
int swapped_in = 0, ret = 0;

pte = pte_offset_map(pmd, address);
for (_address = address; _address < address + HPAGE_PMD_NR*PAGE_SIZE;
pte++, _address += PAGE_SIZE) {
pteval = *pte;
if (!is_swap_pte(pteval))
continue;
swapped_in++;
ret = do_swap_page(mm, vma, _address, pte, pmd,
FAULT_FLAG_ALLOW_RETRY|FAULT_FLAG_RETRY_NOWAIT,
pteval);
if (ret & VM_FAULT_ERROR) {
trace_mm_collapse_huge_page_swapin(mm, swapped_in, 0);
return;
}
/* pte is unmapped now, we need to map it */
pte = pte_offset_map(pmd, _address);
}
pte--;
pte_unmap(pte);
trace_mm_collapse_huge_page_swapin(mm, swapped_in, 1);
}

static void collapse_huge_page(struct mm_struct *mm,
unsigned long address,
struct page **hpage,
Expand Down Expand Up @@ -2440,6 +2478,8 @@ static void collapse_huge_page(struct mm_struct *mm,
goto out;
}

__collapse_huge_page_swapin(mm, vma, address, pmd);

anon_vma_lock_write(vma->anon_vma);

pte = pte_offset_map(pmd, address);
Expand Down Expand Up @@ -2516,9 +2556,6 @@ static void collapse_huge_page(struct mm_struct *mm,
result = SCAN_SUCCEED;
out_up_write:
up_write(&mm->mmap_sem);
trace_mm_collapse_huge_page(mm, isolated, result);
return;

out_nolock:
trace_mm_collapse_huge_page(mm, isolated, result);
return;
Expand Down
4 changes: 4 additions & 0 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
/* Do not use these with a slab allocator */
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)

extern int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags, pte_t orig_pte);

void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling);

Expand Down
2 changes: 1 addition & 1 deletion mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2522,7 +2522,7 @@ EXPORT_SYMBOL(unmap_mapping_range);
* We return with the mmap_sem locked or unlocked in the same cases
* as does filemap_fault().
*/
static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
{
Expand Down

0 comments on commit 8a966ed

Please sign in to comment.