Skip to content

Commit

Permalink
memfd: Convert memfd_wait_for_pins to XArray
Browse files Browse the repository at this point in the history
Simplify the locking by taking the spinlock while we walk the tree on
the assumption that many acquires and releases of the lock will be worse
than holding the lock while we process an entire batch of pages.

Signed-off-by: Matthew Wilcox <[email protected]>
Reviewed-by: Mike Kravetz <[email protected]>
  • Loading branch information
Matthew Wilcox committed Oct 21, 2018
1 parent 7ae3424 commit 2313216
Showing 1 changed file with 25 additions and 36 deletions.
61 changes: 25 additions & 36 deletions mm/memfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <uapi/linux/memfd.h>

/*
* We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
* We need a tag: a new tag would expand every xa_node by 8 bytes,
* so reuse a tag which we firmly believe is never set or cleared on tmpfs
* or hugetlbfs because they are memory only filesystems.
*/
Expand Down Expand Up @@ -72,63 +72,52 @@ static void memfd_tag_pins(struct address_space *mapping)
*/
static int memfd_wait_for_pins(struct address_space *mapping)
{
struct radix_tree_iter iter;
void __rcu **slot;
pgoff_t start;
XA_STATE(xas, &mapping->i_pages, 0);
struct page *page;
int error, scan;

memfd_tag_pins(mapping);

error = 0;
for (scan = 0; scan <= LAST_SCAN; scan++) {
if (!radix_tree_tagged(&mapping->i_pages, MEMFD_TAG_PINNED))
unsigned int tagged = 0;

if (!xas_marked(&xas, MEMFD_TAG_PINNED))
break;

if (!scan)
lru_add_drain_all();
else if (schedule_timeout_killable((HZ << scan) / 200))
scan = LAST_SCAN;

start = 0;
rcu_read_lock();
radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter,
start, MEMFD_TAG_PINNED) {

page = radix_tree_deref_slot(slot);
if (radix_tree_exception(page)) {
if (radix_tree_deref_retry(page)) {
slot = radix_tree_iter_retry(&iter);
continue;
}

page = NULL;
}

if (page &&
page_count(page) - page_mapcount(page) != 1) {
if (scan < LAST_SCAN)
goto continue_resched;

xas_set(&xas, 0);
xas_lock_irq(&xas);
xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
bool clear = true;
if (xa_is_value(page))
continue;
if (page_count(page) - page_mapcount(page) != 1) {
/*
* On the last scan, we clean up all those tags
* we inserted; but make a note that we still
* found pages pinned.
*/
error = -EBUSY;
if (scan == LAST_SCAN)
error = -EBUSY;
else
clear = false;
}
if (clear)
xas_clear_mark(&xas, MEMFD_TAG_PINNED);
if (++tagged % XA_CHECK_SCHED)
continue;

xa_lock_irq(&mapping->i_pages);
radix_tree_tag_clear(&mapping->i_pages,
iter.index, MEMFD_TAG_PINNED);
xa_unlock_irq(&mapping->i_pages);
continue_resched:
if (need_resched()) {
slot = radix_tree_iter_resume(slot, &iter);
cond_resched_rcu();
}
xas_pause(&xas);
xas_unlock_irq(&xas);
cond_resched();
xas_lock_irq(&xas);
}
rcu_read_unlock();
xas_unlock_irq(&xas);
}

return error;
Expand Down

0 comments on commit 2313216

Please sign in to comment.