Skip to content

Commit

Permalink
mm/swap: split swap cache into 64MB trunks
Browse files Browse the repository at this point in the history
The patch is to improve the scalability of the swap out/in via using
fine grained locks for the swap cache.  In current kernel, one address
space will be used for each swap device.  And in the common
configuration, the number of the swap device is very small (one is
typical).  This causes the heavy lock contention on the radix tree of
the address space if multiple tasks swap out/in concurrently.

But in fact, there is no dependency between pages in the swap cache.  So
that, we can split the one shared address space for each swap device
into several address spaces to reduce the lock contention.  In the
patch, the shared address space is split into 64MB trunks.  64MB is
chosen to balance the memory space usage and effect of lock contention
reduction.

The size of struct address_space on x86_64 architecture is 408B, so with
the patch, 6528B more memory will be used for every 1GB swap space on
x86_64 architecture.

One address space is still shared for the swap entries in the same 64M
trunks.  To avoid lock contention for the first round of swap space
allocation, the order of the swap clusters in the initial free clusters
list is changed.  The swap space distance between the consecutive swap
clusters in the free cluster list is at least 64M.  After the first
round of allocation, the swap clusters are expected to be freed
randomly, so the lock contention should be reduced effectively.

Link: http://lkml.kernel.org/r/735bab895e64c930581ffb0a05b661e01da82bc5.1484082593.git.tim.c.chen@linux.intel.com
Signed-off-by: "Huang, Ying" <[email protected]>
Signed-off-by: Tim Chen <[email protected]>
Cc: Aaron Lu <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Christian Borntraeger <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Hillf Danton <[email protected]>
Cc: Huang Ying <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Jonathan Corbet <[email protected]> escreveu:
Cc: Kirill A. Shutemov <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Shaohua Li <[email protected]>
Cc: Vladimir Davydov <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
yhuang-intel authored and torvalds committed Feb 23, 2017
1 parent 235b621 commit 4b3ef9d
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 22 deletions.
11 changes: 9 additions & 2 deletions include/linux/swap.h
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,13 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *,
sector_t *);

/* linux/mm/swap_state.c */
extern struct address_space swapper_spaces[];
#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
/* One swap address space for each 64M swap space */
#define SWAP_ADDRESS_SPACE_SHIFT 14
#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT)
extern struct address_space *swapper_spaces[];
#define swap_address_space(entry) \
(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
>> SWAP_ADDRESS_SPACE_SHIFT])
extern unsigned long total_swapcache_pages(void);
extern void show_swap_cache_info(void);
extern int add_to_swap(struct page *, struct list_head *list);
Expand Down Expand Up @@ -398,6 +403,8 @@ extern struct swap_info_struct *page_swap_info(struct page *);
extern bool reuse_swap_page(struct page *, int *);
extern int try_to_free_swap(struct page *);
struct backing_dev_info;
extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
extern void exit_swap_address_space(unsigned int type);

#else /* CONFIG_SWAP */

Expand Down
6 changes: 0 additions & 6 deletions mm/swap.c
Original file line number Diff line number Diff line change
Expand Up @@ -971,12 +971,6 @@ EXPORT_SYMBOL(pagevec_lookup_tag);
void __init swap_setup(void)
{
unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
#ifdef CONFIG_SWAP
int i;

for (i = 0; i < MAX_SWAPFILES; i++)
spin_lock_init(&swapper_spaces[i].tree_lock);
#endif

/* Use a smaller cluster for small-memory machines */
if (megs < 16)
Expand Down
68 changes: 56 additions & 12 deletions mm/swap_state.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <linux/blkdev.h>
#include <linux/pagevec.h>
#include <linux/migrate.h>
#include <linux/vmalloc.h>

#include <asm/pgtable.h>

Expand All @@ -32,15 +33,8 @@ static const struct address_space_operations swap_aops = {
#endif
};

struct address_space swapper_spaces[MAX_SWAPFILES] = {
[0 ... MAX_SWAPFILES - 1] = {
.page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
.i_mmap_writable = ATOMIC_INIT(0),
.a_ops = &swap_aops,
/* swap cache doesn't use writeback related tags */
.flags = 1 << AS_NO_WRITEBACK_TAGS,
}
};
struct address_space *swapper_spaces[MAX_SWAPFILES];
static unsigned int nr_swapper_spaces[MAX_SWAPFILES];

#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)

Expand All @@ -53,11 +47,26 @@ static struct {

unsigned long total_swapcache_pages(void)
{
int i;
unsigned int i, j, nr;
unsigned long ret = 0;
struct address_space *spaces;

for (i = 0; i < MAX_SWAPFILES; i++)
ret += swapper_spaces[i].nrpages;
rcu_read_lock();
for (i = 0; i < MAX_SWAPFILES; i++) {
/*
* The corresponding entries in nr_swapper_spaces and
* swapper_spaces will be reused only after at least
* one grace period. So it is impossible for them
* belongs to different usage.
*/
nr = nr_swapper_spaces[i];
spaces = rcu_dereference(swapper_spaces[i]);
if (!nr || !spaces)
continue;
for (j = 0; j < nr; j++)
ret += spaces[j].nrpages;
}
rcu_read_unlock();
return ret;
}

Expand Down Expand Up @@ -505,3 +514,38 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
skip:
return read_swap_cache_async(entry, gfp_mask, vma, addr);
}

int init_swap_address_space(unsigned int type, unsigned long nr_pages)
{
struct address_space *spaces, *space;
unsigned int i, nr;

nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
spaces = vzalloc(sizeof(struct address_space) * nr);
if (!spaces)
return -ENOMEM;
for (i = 0; i < nr; i++) {
space = spaces + i;
INIT_RADIX_TREE(&space->page_tree, GFP_ATOMIC|__GFP_NOWARN);
atomic_set(&space->i_mmap_writable, 0);
space->a_ops = &swap_aops;
/* swap cache doesn't use writeback related tags */
mapping_set_no_writeback_tags(space);
spin_lock_init(&space->tree_lock);
}
nr_swapper_spaces[type] = nr;
rcu_assign_pointer(swapper_spaces[type], spaces);

return 0;
}

void exit_swap_address_space(unsigned int type)
{
struct address_space *spaces;

spaces = swapper_spaces[type];
nr_swapper_spaces[type] = 0;
rcu_assign_pointer(swapper_spaces[type], NULL);
synchronize_rcu();
kvfree(spaces);
}
16 changes: 14 additions & 2 deletions mm/swapfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -2084,6 +2084,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
vfree(frontswap_map);
/* Destroy swap account information */
swap_cgroup_swapoff(p->type);
exit_swap_address_space(p->type);

inode = mapping->host;
if (S_ISBLK(inode->i_mode)) {
Expand Down Expand Up @@ -2407,8 +2408,12 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
return maxpages;
}

#define SWAP_CLUSTER_COLS \
#define SWAP_CLUSTER_INFO_COLS \
DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
#define SWAP_CLUSTER_SPACE_COLS \
DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
#define SWAP_CLUSTER_COLS \
max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)

static int setup_swap_map_and_extents(struct swap_info_struct *p,
union swap_header *swap_header,
Expand Down Expand Up @@ -2471,7 +2476,10 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
return nr_extents;


/* Reduce false cache line sharing between cluster_info */
/*
* Reduce false cache line sharing between cluster_info and
* sharing same address space.
*/
for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
j = (k + col) % SWAP_CLUSTER_COLS;
for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
Expand Down Expand Up @@ -2661,6 +2669,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
}

error = init_swap_address_space(p->type, maxpages);
if (error)
goto bad_swap;

mutex_lock(&swapon_mutex);
prio = -1;
if (swap_flags & SWAP_FLAG_PREFER)
Expand Down

0 comments on commit 4b3ef9d

Please sign in to comment.