Skip to content

Commit

Permalink
mm, swap: skip swapcache for swapin of synchronous device
Browse files Browse the repository at this point in the history
With fast swap storage, the platforms want to use swap more aggressively
and swap-in is crucial to application latency.

The rw_page() based synchronous devices like zram, pmem and btt are such
fast storage.  When I profile swapin performance with zram lz4
decompress test, S/W overhead is more than 70%.  Maybe, it would be
bigger in nvdimm.

This patch aims to reduce swap-in latency by skipping swapcache if the
swap device is synchronous device like rw_page based device.  It
enhances 45% my swapin test(5G sequential swapin, no readahead, from
2.41sec to 1.64sec).

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Minchan Kim <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Ross Zwisler <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Ilya Dryomov <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Sergey Senozhatsky <[email protected]>
Cc: Huang Ying <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
minchank authored and torvalds committed Nov 16, 2017
1 parent 539a6fe commit 0bcac06
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 23 deletions.
11 changes: 11 additions & 0 deletions include/linux/swap.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@ extern int page_swapcount(struct page *);
extern int __swp_swapcount(swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
extern struct swap_info_struct *page_swap_info(struct page *);
extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
extern bool reuse_swap_page(struct page *, int *);
extern int try_to_free_swap(struct page *);
struct backing_dev_info;
Expand All @@ -474,6 +475,16 @@ extern void exit_swap_address_space(unsigned int type);

#else /* CONFIG_SWAP */

static inline int swap_readpage(struct page *page, bool do_poll)
{
return 0;
}

static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry)
{
return NULL;
}

#define swap_address_space(entry) (NULL)
#define get_nr_swap_pages() 0L
#define total_swap_pages 0L
Expand Down
52 changes: 36 additions & 16 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2842,7 +2842,7 @@ EXPORT_SYMBOL(unmap_mapping_range);
int do_swap_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct page *page = NULL, *swapcache;
struct page *page = NULL, *swapcache = NULL;
struct mem_cgroup *memcg;
struct vma_swap_readahead swap_ra;
swp_entry_t entry;
Expand Down Expand Up @@ -2881,17 +2881,35 @@ int do_swap_page(struct vm_fault *vmf)
}
goto out;
}


delayacct_set_flag(DELAYACCT_PF_SWAPIN);
if (!page)
page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
vmf->address);
if (!page) {
if (vma_readahead)
page = do_swap_page_readahead(entry,
GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
else
page = swapin_readahead(entry,
GFP_HIGHUSER_MOVABLE, vma, vmf->address);
struct swap_info_struct *si = swp_swap_info(entry);

if (!(si->flags & SWP_SYNCHRONOUS_IO)) {
if (vma_readahead)
page = do_swap_page_readahead(entry,
GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
else
page = swapin_readahead(entry,
GFP_HIGHUSER_MOVABLE, vma, vmf->address);
swapcache = page;
} else {
/* skip swapcache */
page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
if (page) {
__SetPageLocked(page);
__SetPageSwapBacked(page);
set_page_private(page, entry.val);
lru_cache_add_anon(page);
swap_readpage(page, true);
}
}

if (!page) {
/*
* Back out if somebody else faulted in this pte
Expand Down Expand Up @@ -2920,7 +2938,6 @@ int do_swap_page(struct vm_fault *vmf)
goto out_release;
}

swapcache = page;
locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);

delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
Expand All @@ -2935,7 +2952,8 @@ int do_swap_page(struct vm_fault *vmf)
* test below, are not enough to exclude that. Even if it is still
* swapcache, we need to check that the page's swap has not changed.
*/
if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
if (unlikely((!PageSwapCache(page) ||
page_private(page) != entry.val)) && swapcache)
goto out_page;

page = ksm_might_need_to_copy(page, vma, vmf->address);
Expand Down Expand Up @@ -2988,22 +3006,24 @@ int do_swap_page(struct vm_fault *vmf)
pte = pte_mksoft_dirty(pte);
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
vmf->orig_pte = pte;
if (page == swapcache) {
do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
mem_cgroup_commit_charge(page, memcg, true, false);
activate_page(page);
} else { /* ksm created a completely new copy */

/* ksm created a completely new copy */
if (unlikely(page != swapcache && swapcache)) {
page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false);
lru_cache_add_active_or_unevictable(page, vma);
} else {
do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
mem_cgroup_commit_charge(page, memcg, true, false);
activate_page(page);
}

swap_free(entry);
if (mem_cgroup_swap_full(page) ||
(vma->vm_flags & VM_LOCKED) || PageMlocked(page))
try_to_free_swap(page);
unlock_page(page);
if (page != swapcache) {
if (page != swapcache && swapcache) {
/*
* Hold the lock to avoid the swap entry to be reused
* until we take the PT lock for the pte_same() check
Expand Down Expand Up @@ -3036,7 +3056,7 @@ int do_swap_page(struct vm_fault *vmf)
unlock_page(page);
out_release:
put_page(page);
if (page != swapcache) {
if (page != swapcache && swapcache) {
unlock_page(swapcache);
put_page(swapcache);
}
Expand Down
6 changes: 3 additions & 3 deletions mm/page_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -347,15 +347,15 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
return ret;
}

int swap_readpage(struct page *page, bool do_poll)
int swap_readpage(struct page *page, bool synchronous)
{
struct bio *bio;
int ret = 0;
struct swap_info_struct *sis = page_swap_info(page);
blk_qc_t qc;
struct gendisk *disk;

VM_BUG_ON_PAGE(!PageSwapCache(page), page);
VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageUptodate(page), page);
if (frontswap_load(page) == 0) {
Expand Down Expand Up @@ -403,7 +403,7 @@ int swap_readpage(struct page *page, bool do_poll)
count_vm_event(PSWPIN);
bio_get(bio);
qc = submit_bio(bio);
while (do_poll) {
while (synchronous) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!READ_ONCE(bio->bi_private))
break;
Expand Down
11 changes: 7 additions & 4 deletions mm/swapfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -3455,26 +3455,29 @@ int swapcache_prepare(swp_entry_t entry)
return __swap_duplicate(entry, SWAP_HAS_CACHE);
}

struct swap_info_struct *swp_swap_info(swp_entry_t entry)
{
return swap_info[swp_type(entry)];
}

struct swap_info_struct *page_swap_info(struct page *page)
{
swp_entry_t swap = { .val = page_private(page) };
return swap_info[swp_type(swap)];
swp_entry_t entry = { .val = page_private(page) };
return swp_swap_info(entry);
}

/*
* out-of-line __page_file_ methods to avoid include hell.
*/
struct address_space *__page_file_mapping(struct page *page)
{
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
return page_swap_info(page)->swap_file->f_mapping;
}
EXPORT_SYMBOL_GPL(__page_file_mapping);

pgoff_t __page_file_index(struct page *page)
{
swp_entry_t swap = { .val = page_private(page) };
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
return swp_offset(swap);
}
EXPORT_SYMBOL_GPL(__page_file_index);
Expand Down

0 comments on commit 0bcac06

Please sign in to comment.