Skip to content

Commit

Permalink
dax: Create a range version of dax_layout_busy_page()
Browse files Browse the repository at this point in the history
virtiofs device has a range of memory which is mapped into file inodes
using dax. This memory is mapped in qemu on host and maps different
sections of real file on host. Size of this memory is limited
(determined by administrator) and depending on filesystem size, we will
soon reach a situation where all the memory is in use and we need to
reclaim some.

As part of reclaim process, we will need to make sure that there are
no active references to pages (taken by get_user_pages()) on the memory
range we are trying to reclaim. I am planning to use
dax_layout_busy_page() for this. But in current form this is per inode
and scans through all the pages of the inode.

We want to reclaim only a portion of memory (say 2MB page). So we want
to make sure that only that 2MB range of pages do not have any
references  (and don't want to unmap all the pages of inode).

Hence, create a range version of this function named
dax_layout_busy_page_range() which can be used to pass a range which
needs to be unmapped.

Cc: Dan Williams <[email protected]>
Cc: [email protected]
Cc: Jan Kara <[email protected]>
Cc: Vishal L Verma <[email protected]>
Cc: "Weiny, Ira" <[email protected]>
Signed-off-by: Vivek Goyal <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
Signed-off-by: Miklos Szeredi <[email protected]>
  • Loading branch information
rhvgoyal authored and Miklos Szeredi committed Sep 10, 2020
1 parent 1a9d5d4 commit 6bbdd56
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 6 deletions.
29 changes: 23 additions & 6 deletions fs/dax.c
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,11 @@ static void *grab_mapping_entry(struct xa_state *xas,
}

/**
* dax_layout_busy_page - find first pinned page in @mapping
* dax_layout_busy_page_range - find first pinned page in @mapping
* @mapping: address space to scan for a page with ref count > 1
* @start: Starting offset. Page containing 'start' is included.
* @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX,
* pages from 'start' till the end of file are included.
*
* DAX requires ZONE_DEVICE mapped pages. These pages are never
* 'onlined' to the page allocator so they are considered idle when
Expand All @@ -573,12 +576,15 @@ static void *grab_mapping_entry(struct xa_state *xas,
* to be able to run unmap_mapping_range() and subsequently not race
* mapping_mapped() becoming true.
*/
struct page *dax_layout_busy_page(struct address_space *mapping)
struct page *dax_layout_busy_page_range(struct address_space *mapping,
loff_t start, loff_t end)
{
XA_STATE(xas, &mapping->i_pages, 0);
void *entry;
unsigned int scanned = 0;
struct page *page = NULL;
pgoff_t start_idx = start >> PAGE_SHIFT;
pgoff_t end_idx;
XA_STATE(xas, &mapping->i_pages, start_idx);

/*
* In the 'limited' case get_user_pages() for dax is disabled.
Expand All @@ -589,22 +595,27 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
if (!dax_mapping(mapping) || !mapping_mapped(mapping))
return NULL;

/* If end == LLONG_MAX, all pages from start to till end of file */
if (end == LLONG_MAX)
end_idx = ULONG_MAX;
else
end_idx = end >> PAGE_SHIFT;
/*
* If we race get_user_pages_fast() here either we'll see the
* elevated page count in the iteration and wait, or
* get_user_pages_fast() will see that the page it took a reference
* against is no longer mapped in the page tables and bail to the
* get_user_pages() slow path. The slow path is protected by
* pte_lock() and pmd_lock(). New references are not taken without
* holding those locks, and unmap_mapping_range() will not zero the
* holding those locks, and unmap_mapping_pages() will not zero the
* pte or pmd without holding the respective lock, so we are
* guaranteed to either see new references or prevent new
* references from being established.
*/
unmap_mapping_range(mapping, 0, 0, 0);
unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0);

xas_lock_irq(&xas);
xas_for_each(&xas, entry, ULONG_MAX) {
xas_for_each(&xas, entry, end_idx) {
if (WARN_ON_ONCE(!xa_is_value(entry)))
continue;
if (unlikely(dax_is_locked(entry)))
Expand All @@ -625,6 +636,12 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
xas_unlock_irq(&xas);
return page;
}
EXPORT_SYMBOL_GPL(dax_layout_busy_page_range);

struct page *dax_layout_busy_page(struct address_space *mapping)
{
return dax_layout_busy_page_range(mapping, 0, LLONG_MAX);
}
EXPORT_SYMBOL_GPL(dax_layout_busy_page);

static int __dax_invalidate_entry(struct address_space *mapping,
Expand Down
6 changes: 6 additions & 0 deletions include/linux/dax.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
struct dax_device *dax_dev, struct writeback_control *wbc);

struct page *dax_layout_busy_page(struct address_space *mapping);
struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
dax_entry_t dax_lock_page(struct page *page);
void dax_unlock_page(struct page *page, dax_entry_t cookie);
#else
Expand Down Expand Up @@ -171,6 +172,11 @@ static inline struct page *dax_layout_busy_page(struct address_space *mapping)
return NULL;
}

static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages)
{
return NULL;
}

static inline int dax_writeback_mapping_range(struct address_space *mapping,
struct dax_device *dax_dev, struct writeback_control *wbc)
{
Expand Down

0 comments on commit 6bbdd56

Please sign in to comment.