Skip to content

Commit

Permalink
mm + fs: prepare for non-page entries in page cache radix trees
Browse files Browse the repository at this point in the history
shmem mappings already contain exceptional entries where swap slot
information is remembered.

To be able to store eviction information for regular page cache, prepare
every site dealing with the radix trees directly to handle entries other
than pages.

The common lookup functions will filter out non-page entries and return
NULL for page cache holes, just as before.  But provide a raw version of
the API which returns non-page entries as well, and switch shmem over to
use it.

Signed-off-by: Johannes Weiner <[email protected]>
Reviewed-by: Rik van Riel <[email protected]>
Reviewed-by: Minchan Kim <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Bob Liu <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Dave Chinner <[email protected]>
Cc: Greg Thelen <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Jan Kara <[email protected]>
Cc: KOSAKI Motohiro <[email protected]>
Cc: Luigi Semenzato <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Metin Doslu <[email protected]>
Cc: Michel Lespinasse <[email protected]>
Cc: Ozgun Erdogan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Roman Gushchin <[email protected]>
Cc: Ryan Mallon <[email protected]>
Cc: Tejun Heo <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
hnaz authored and torvalds committed Apr 3, 2014
1 parent e7b563b commit 0cd6144
Show file tree
Hide file tree
Showing 11 changed files with 349 additions and 130 deletions.
2 changes: 1 addition & 1 deletion fs/btrfs/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
rcu_read_lock();
page = radix_tree_lookup(&mapping->page_tree, pg_index);
rcu_read_unlock();
if (page) {
if (page && !radix_tree_exceptional_entry(page)) {
misses++;
if (misses > 4)
break;
Expand Down
8 changes: 8 additions & 0 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,14 @@ extern void show_free_areas(unsigned int flags);
extern bool skip_free_areas_node(unsigned int flags, int nid);

int shmem_zero_setup(struct vm_area_struct *);
#ifdef CONFIG_SHMEM
bool shmem_mapping(struct address_space *mapping);
#else
static inline bool shmem_mapping(struct address_space *mapping)
{
return false;
}
#endif

extern int can_do_mlock(void);
extern int user_shm_lock(size_t, struct user_struct *);
Expand Down
15 changes: 9 additions & 6 deletions include/linux/pagemap.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,15 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
pgoff_t page_cache_prev_hole(struct address_space *mapping,
pgoff_t index, unsigned long max_scan);

extern struct page * find_get_page(struct address_space *mapping,
pgoff_t index);
extern struct page * find_lock_page(struct address_space *mapping,
pgoff_t index);
extern struct page * find_or_create_page(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
struct page *find_get_page(struct address_space *mapping, pgoff_t offset);
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
struct page *find_lock_page(struct address_space *mapping, pgoff_t offset);
struct page *find_or_create_page(struct address_space *mapping, pgoff_t index,
gfp_t gfp_mask);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
unsigned int nr_entries, struct page **entries,
pgoff_t *indices);
unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
unsigned int nr_pages, struct page **pages);
unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
Expand Down
5 changes: 5 additions & 0 deletions include/linux/pagevec.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ struct pagevec {

void __pagevec_release(struct pagevec *pvec);
void __pagevec_lru_add(struct pagevec *pvec);
unsigned pagevec_lookup_entries(struct pagevec *pvec,
struct address_space *mapping,
pgoff_t start, unsigned nr_entries,
pgoff_t *indices);
void pagevec_remove_exceptionals(struct pagevec *pvec);
unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
pgoff_t start, unsigned nr_pages);
unsigned pagevec_lookup_tag(struct pagevec *pvec,
Expand Down
1 change: 1 addition & 0 deletions include/linux/shmem_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
unsigned long flags);
extern int shmem_zero_setup(struct vm_area_struct *);
extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
extern bool shmem_mapping(struct address_space *mapping);
extern void shmem_unlock_mapping(struct address_space *mapping);
extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
Expand Down
202 changes: 178 additions & 24 deletions mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,29 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
}
EXPORT_SYMBOL_GPL(replace_page_cache_page);

static int page_cache_tree_insert(struct address_space *mapping,
struct page *page)
{
void **slot;
int error;

slot = radix_tree_lookup_slot(&mapping->page_tree, page->index);
if (slot) {
void *p;

p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
if (!radix_tree_exceptional_entry(p))
return -EEXIST;
radix_tree_replace_slot(slot, page);
mapping->nrpages++;
return 0;
}
error = radix_tree_insert(&mapping->page_tree, page->index, page);
if (!error)
mapping->nrpages++;
return error;
}

/**
* add_to_page_cache_locked - add a locked page to the pagecache
* @page: page to add
Expand Down Expand Up @@ -480,11 +503,10 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
page->index = offset;

spin_lock_irq(&mapping->tree_lock);
error = radix_tree_insert(&mapping->page_tree, offset, page);
error = page_cache_tree_insert(mapping, page);
radix_tree_preload_end();
if (unlikely(error))
goto err_insert;
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
spin_unlock_irq(&mapping->tree_lock);
trace_mm_filemap_add_to_page_cache(page);
Expand Down Expand Up @@ -712,7 +734,10 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
unsigned long i;

for (i = 0; i < max_scan; i++) {
if (!radix_tree_lookup(&mapping->page_tree, index))
struct page *page;

page = radix_tree_lookup(&mapping->page_tree, index);
if (!page || radix_tree_exceptional_entry(page))
break;
index++;
if (index == 0)
Expand Down Expand Up @@ -750,7 +775,10 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
unsigned long i;

for (i = 0; i < max_scan; i++) {
if (!radix_tree_lookup(&mapping->page_tree, index))
struct page *page;

page = radix_tree_lookup(&mapping->page_tree, index);
if (!page || radix_tree_exceptional_entry(page))
break;
index--;
if (index == ULONG_MAX)
Expand All @@ -762,14 +790,19 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
EXPORT_SYMBOL(page_cache_prev_hole);

/**
* find_get_page - find and get a page reference
* find_get_entry - find and get a page cache entry
* @mapping: the address_space to search
* @offset: the page index
* @offset: the page cache index
*
* Looks up the page cache slot at @mapping & @offset. If there is a
* page cache page, it is returned with an increased refcount.
*
* Is there a pagecache struct page at the given (mapping, offset) tuple?
* If yes, increment its refcount and return it; if no, return NULL.
* If the slot holds a shadow entry of a previously evicted page, it
* is returned.
*
* Otherwise, %NULL is returned.
*/
struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
{
void **pagep;
struct page *page;
Expand Down Expand Up @@ -810,24 +843,50 @@ struct page *find_get_page(struct address_space *mapping, pgoff_t offset)

return page;
}
EXPORT_SYMBOL(find_get_page);
EXPORT_SYMBOL(find_get_entry);

/**
* find_lock_page - locate, pin and lock a pagecache page
* find_get_page - find and get a page reference
* @mapping: the address_space to search
* @offset: the page index
*
* Locates the desired pagecache page, locks it, increments its reference
* count and returns its address.
* Looks up the page cache slot at @mapping & @offset. If there is a
* page cache page, it is returned with an increased refcount.
*
* Returns zero if the page was not present. find_lock_page() may sleep.
* Otherwise, %NULL is returned.
*/
struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
{
struct page *page = find_get_entry(mapping, offset);

if (radix_tree_exceptional_entry(page))
page = NULL;
return page;
}
EXPORT_SYMBOL(find_get_page);

/**
* find_lock_entry - locate, pin and lock a page cache entry
* @mapping: the address_space to search
* @offset: the page cache index
*
* Looks up the page cache slot at @mapping & @offset. If there is a
* page cache page, it is returned locked and with an increased
* refcount.
*
* If the slot holds a shadow entry of a previously evicted page, it
* is returned.
*
* Otherwise, %NULL is returned.
*
* find_lock_entry() may sleep.
*/
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
{
struct page *page;

repeat:
page = find_get_page(mapping, offset);
page = find_get_entry(mapping, offset);
if (page && !radix_tree_exception(page)) {
lock_page(page);
/* Has the page been truncated? */
Expand All @@ -840,6 +899,29 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
}
return page;
}
EXPORT_SYMBOL(find_lock_entry);

/**
* find_lock_page - locate, pin and lock a pagecache page
* @mapping: the address_space to search
* @offset: the page index
*
* Looks up the page cache slot at @mapping & @offset. If there is a
* page cache page, it is returned locked and with an increased
* refcount.
*
* Otherwise, %NULL is returned.
*
* find_lock_page() may sleep.
*/
struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
{
struct page *page = find_lock_entry(mapping, offset);

if (radix_tree_exceptional_entry(page))
page = NULL;
return page;
}
EXPORT_SYMBOL(find_lock_page);

/**
Expand All @@ -848,16 +930,18 @@ EXPORT_SYMBOL(find_lock_page);
* @index: the page's index into the mapping
* @gfp_mask: page allocation mode
*
* Locates a page in the pagecache. If the page is not present, a new page
* is allocated using @gfp_mask and is added to the pagecache and to the VM's
* LRU list. The returned page is locked and has its reference count
* incremented.
* Looks up the page cache slot at @mapping & @offset. If there is a
* page cache page, it is returned locked and with an increased
* refcount.
*
* If the page is not present, a new page is allocated using @gfp_mask
* and added to the page cache and the VM's LRU list. The page is
* returned locked and with an increased refcount.
*
* find_or_create_page() may sleep, even if @gfp_flags specifies an atomic
* allocation!
* On memory exhaustion, %NULL is returned.
*
* find_or_create_page() returns the desired page's address, or zero on
* memory exhaustion.
* find_or_create_page() may sleep, even if @gfp_flags specifies an
* atomic allocation!
*/
struct page *find_or_create_page(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask)
Expand Down Expand Up @@ -889,6 +973,76 @@ struct page *find_or_create_page(struct address_space *mapping,
}
EXPORT_SYMBOL(find_or_create_page);

/**
* find_get_entries - gang pagecache lookup
* @mapping: The address_space to search
* @start: The starting page cache index
* @nr_entries: The maximum number of entries
* @entries: Where the resulting entries are placed
* @indices: The cache indices corresponding to the entries in @entries
*
* find_get_entries() will search for and return a group of up to
* @nr_entries entries in the mapping. The entries are placed at
* @entries. find_get_entries() takes a reference against any actual
* pages it returns.
*
* The search returns a group of mapping-contiguous page cache entries
* with ascending indexes. There may be holes in the indices due to
* not-present pages.
*
* Any shadow entries of evicted pages are included in the returned
* array.
*
* find_get_entries() returns the number of pages and shadow entries
* which were found.
*/
unsigned find_get_entries(struct address_space *mapping,
pgoff_t start, unsigned int nr_entries,
struct page **entries, pgoff_t *indices)
{
void **slot;
unsigned int ret = 0;
struct radix_tree_iter iter;

if (!nr_entries)
return 0;

rcu_read_lock();
restart:
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
struct page *page;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
continue;
if (radix_tree_exception(page)) {
if (radix_tree_deref_retry(page))
goto restart;
/*
* Otherwise, we must be storing a swap entry
* here as an exceptional entry: so return it
* without attempting to raise page count.
*/
goto export;
}
if (!page_cache_get_speculative(page))
goto repeat;

/* Has the page moved? */
if (unlikely(page != *slot)) {
page_cache_release(page);
goto repeat;
}
export:
indices[ret] = iter.index;
entries[ret] = page;
if (++ret == nr_entries)
break;
}
rcu_read_unlock();
return ret;
}

/**
* find_get_pages - gang pagecache lookup
* @mapping: The address_space to search
Expand Down
20 changes: 14 additions & 6 deletions mm/mincore.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,21 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
* any other file mapping (ie. marked !present and faulted in with
* tmpfs's .fault). So swapped out tmpfs mappings are tested here.
*/
page = find_get_page(mapping, pgoff);
#ifdef CONFIG_SWAP
/* shmem/tmpfs may return swap: account for swapcache page too. */
if (radix_tree_exceptional_entry(page)) {
swp_entry_t swap = radix_to_swp_entry(page);
page = find_get_page(swap_address_space(swap), swap.val);
}
if (shmem_mapping(mapping)) {
page = find_get_entry(mapping, pgoff);
/*
* shmem/tmpfs may return swap: account for swapcache
* page too.
*/
if (radix_tree_exceptional_entry(page)) {
swp_entry_t swp = radix_to_swp_entry(page);
page = find_get_page(swap_address_space(swp), swp.val);
}
} else
page = find_get_page(mapping, pgoff);
#else
page = find_get_page(mapping, pgoff);
#endif
if (page) {
present = PageUptodate(page);
Expand Down
2 changes: 1 addition & 1 deletion mm/readahead.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
rcu_read_lock();
page = radix_tree_lookup(&mapping->page_tree, page_offset);
rcu_read_unlock();
if (page)
if (page && !radix_tree_exceptional_entry(page))
continue;

page = page_cache_alloc_readahead(mapping);
Expand Down
Loading

0 comments on commit 0cd6144

Please sign in to comment.