Skip to content

Commit

Permalink
mm: make madvise(MADV_WILLNEED) support swap file prefetch
Browse files Browse the repository at this point in the history
Make madvise(MADV_WILLNEED) support swap file prefetch.  If memory is
swapout, this syscall can do swapin prefetch.  It has no impact if the
memory isn't swapout.

[[email protected]: fix CONFIG_SWAP=n build]
[[email protected]: fix BUG on madvise early failure]
Signed-off-by: Shaohua Li <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Rik van Riel <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Shaohua Li authored and torvalds committed Feb 24, 2013
1 parent a394cb8 commit 1998cc0
Showing 1 changed file with 101 additions and 4 deletions.
105 changes: 101 additions & 4 deletions mm/madvise.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#include <linux/ksm.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/blkdev.h>
#include <linux/swap.h>
#include <linux/swapops.h>

/*
* Any behaviour which results in changes to the vma->vm_flags needs to
Expand Down Expand Up @@ -131,6 +134,84 @@ static long madvise_behavior(struct vm_area_struct * vma,
return error;
}

#ifdef CONFIG_SWAP
static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
unsigned long end, struct mm_walk *walk)
{
pte_t *orig_pte;
struct vm_area_struct *vma = walk->private;
unsigned long index;

if (pmd_none_or_trans_huge_or_clear_bad(pmd))
return 0;

for (index = start; index != end; index += PAGE_SIZE) {
pte_t pte;
swp_entry_t entry;
struct page *page;
spinlock_t *ptl;

orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
pte = *(orig_pte + ((index - start) / PAGE_SIZE));
pte_unmap_unlock(orig_pte, ptl);

if (pte_present(pte) || pte_none(pte) || pte_file(pte))
continue;
entry = pte_to_swp_entry(pte);
if (unlikely(non_swap_entry(entry)))
continue;

page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
vma, index);
if (page)
page_cache_release(page);
}

return 0;
}

static void force_swapin_readahead(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
struct mm_walk walk = {
.mm = vma->vm_mm,
.pmd_entry = swapin_walk_pmd_entry,
.private = vma,
};

walk_page_range(start, end, &walk);

lru_add_drain(); /* Push any new pages onto the LRU now */
}

static void force_shm_swapin_readahead(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct address_space *mapping)
{
pgoff_t index;
struct page *page;
swp_entry_t swap;

for (; start < end; start += PAGE_SIZE) {
index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;

page = find_get_page(mapping, index);
if (!radix_tree_exceptional_entry(page)) {
if (page)
page_cache_release(page);
continue;
}
swap = radix_to_swp_entry(page);
page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
NULL, 0);
if (page)
page_cache_release(page);
}

lru_add_drain(); /* Push any new pages onto the LRU now */
}
#endif /* CONFIG_SWAP */

/*
* Schedule all required I/O operations. Do not wait for completion.
*/
Expand All @@ -140,6 +221,18 @@ static long madvise_willneed(struct vm_area_struct * vma,
{
struct file *file = vma->vm_file;

#ifdef CONFIG_SWAP
if (!file || mapping_cap_swap_backed(file->f_mapping)) {
*prev = vma;
if (!file)
force_swapin_readahead(vma, start, end);
else
force_shm_swapin_readahead(vma, start, end,
file->f_mapping);
return 0;
}
#endif

if (!file)
return -EBADF;

Expand Down Expand Up @@ -371,6 +464,7 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
int error = -EINVAL;
int write;
size_t len;
struct blk_plug plug;

#ifdef CONFIG_MEMORY_FAILURE
if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
Expand Down Expand Up @@ -410,18 +504,19 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
if (vma && start > vma->vm_start)
prev = vma;

blk_start_plug(&plug);
for (;;) {
/* Still start < end. */
error = -ENOMEM;
if (!vma)
goto out;
goto out_plug;

/* Here start < (end|vma->vm_end). */
if (start < vma->vm_start) {
unmapped_error = -ENOMEM;
start = vma->vm_start;
if (start >= end)
goto out;
goto out_plug;
}

/* Here vma->vm_start <= start < (end|vma->vm_end) */
Expand All @@ -432,18 +527,20 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
error = madvise_vma(vma, &prev, start, tmp, behavior);
if (error)
goto out;
goto out_plug;
start = tmp;
if (prev && start < prev->vm_end)
start = prev->vm_end;
error = unmapped_error;
if (start >= end)
goto out;
goto out_plug;
if (prev)
vma = prev->vm_next;
else /* madvise_remove dropped mmap_sem */
vma = find_vma(current->mm, start);
}
out_plug:
blk_finish_plug(&plug);
out:
if (write)
up_write(&current->mm->mmap_sem);
Expand Down

0 comments on commit 1998cc0

Please sign in to comment.