Skip to content

Commit

Permalink
mm: replace vma prio_tree with an interval tree
Browse files Browse the repository at this point in the history
Implement an interval tree as a replacement for the VMA prio_tree.  The
algorithms are similar to lib/interval_tree.c; however that code can't be
directly reused as the interval endpoints are not explicitly stored in the
VMA.  So instead, the common algorithm is moved into a template and the
details (node type, how to get interval endpoints from the node, etc) are
filled in using the C preprocessor.

Once the interval tree functions are available, using them as a
replacement to the VMA prio tree is a relatively simple, mechanical job.

Signed-off-by: Michel Lespinasse <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Hillf Danton <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Catalin Marinas <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: David Woodhouse <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
walken-google authored and torvalds committed Oct 9, 2012
1 parent fff3fd8 commit 6b2dbba
Show file tree
Hide file tree
Showing 25 changed files with 357 additions and 466 deletions.
3 changes: 1 addition & 2 deletions arch/arm/mm/fault-armv.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *mpnt;
struct prio_tree_iter iter;
unsigned long offset;
pgoff_t pgoff;
int aliases = 0;
Expand All @@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
* cache coherency.
*/
flush_dcache_mmap_lock(mapping);
vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
/*
* If this VMA is not in our MM, we can ignore it.
* Note that we intentionally mask out the VMA
Expand Down
3 changes: 1 addition & 2 deletions arch/arm/mm/flush.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
{
struct mm_struct *mm = current->active_mm;
struct vm_area_struct *mpnt;
struct prio_tree_iter iter;
pgoff_t pgoff;

/*
Expand All @@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);

flush_dcache_mmap_lock(mapping);
vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
unsigned long offset;

/*
Expand Down
3 changes: 1 addition & 2 deletions arch/parisc/kernel/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,6 @@ void flush_dcache_page(struct page *page)
{
struct address_space *mapping = page_mapping(page);
struct vm_area_struct *mpnt;
struct prio_tree_iter iter;
unsigned long offset;
unsigned long addr, old_addr = 0;
pgoff_t pgoff;
Expand All @@ -299,7 +298,7 @@ void flush_dcache_page(struct page *page)
* to flush one address here for them all to become coherent */

flush_dcache_mmap_lock(mapping);
vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
addr = mpnt->vm_start + offset;

Expand Down
3 changes: 1 addition & 2 deletions arch/x86/mm/hugetlbpage.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
struct address_space *mapping = vma->vm_file->f_mapping;
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
struct prio_tree_iter iter;
struct vm_area_struct *svma;
unsigned long saddr;
pte_t *spte = NULL;
Expand All @@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
return (pte_t *)pmd_alloc(mm, pud, addr);

mutex_lock(&mapping->i_mmap_mutex);
vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;

Expand Down
9 changes: 4 additions & 5 deletions fs/hugetlbfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -397,17 +397,16 @@ static void hugetlbfs_evict_inode(struct inode *inode)
}

static inline void
hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff)
{
struct vm_area_struct *vma;
struct prio_tree_iter iter;

vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) {
vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) {
unsigned long v_offset;

/*
* Can the expression below overflow on 32-bit arches?
* No, because the prio_tree returns us only those vmas
* No, because the interval tree returns us only those vmas
* which overlap the truncated area starting at pgoff,
* and no vma on a 32-bit arch can span beyond the 4GB.
*/
Expand All @@ -432,7 +431,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)

i_size_write(inode, offset);
mutex_lock(&mapping->i_mmap_mutex);
if (!prio_tree_empty(&mapping->i_mmap))
if (!RB_EMPTY_ROOT(&mapping->i_mmap))
hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
mutex_unlock(&mapping->i_mmap_mutex);
truncate_hugepages(inode, offset);
Expand Down
2 changes: 1 addition & 1 deletion fs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ void address_space_init_once(struct address_space *mapping)
mutex_init(&mapping->i_mmap_mutex);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
mapping->i_mmap = RB_ROOT;
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
}
EXPORT_SYMBOL(address_space_init_once);
Expand Down
6 changes: 3 additions & 3 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ struct inodes_stat_t {
#include <linux/cache.h>
#include <linux/list.h>
#include <linux/radix-tree.h>
#include <linux/prio_tree.h>
#include <linux/rbtree.h>
#include <linux/init.h>
#include <linux/pid.h>
#include <linux/bug.h>
Expand Down Expand Up @@ -669,7 +669,7 @@ struct address_space {
struct radix_tree_root page_tree; /* radix tree of all pages */
spinlock_t tree_lock; /* and lock protecting it */
unsigned int i_mmap_writable;/* count VM_SHARED mappings */
struct prio_tree_root i_mmap; /* tree of private and shared mappings */
struct rb_root i_mmap; /* tree of private and shared mappings */
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
struct mutex i_mmap_mutex; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
Expand Down Expand Up @@ -741,7 +741,7 @@ int mapping_tagged(struct address_space *mapping, int tag);
*/
static inline int mapping_mapped(struct address_space *mapping)
{
return !prio_tree_empty(&mapping->i_mmap) ||
return !RB_EMPTY_ROOT(&mapping->i_mmap) ||
!list_empty(&mapping->i_mmap_nonlinear);
}

Expand Down
215 changes: 215 additions & 0 deletions include/linux/interval_tree_tmpl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
/*
Interval Trees
(C) 2012 Michel Lespinasse <[email protected]>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include/linux/interval_tree_tmpl.h
*/

/*
* Template for implementing interval trees
*
* ITSTRUCT: struct type of the interval tree nodes
* ITRB: name of struct rb_node field within ITSTRUCT
* ITTYPE: type of the interval endpoints
* ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree
* ITSTART(n): start endpoint of ITSTRUCT node n
* ITLAST(n): last endpoing of ITSTRUCT node n
* ITSTATIC: 'static' or empty
* ITPREFIX: prefix to use for the inline tree definitions
*/

/* IT(name) -> ITPREFIX_name */
#define _ITNAME(prefix, name) prefix ## _ ## name
#define ITNAME(prefix, name) _ITNAME(prefix, name)
#define IT(name) ITNAME(ITPREFIX, name)

/* Callbacks for augmented rbtree insert and remove */

static inline ITTYPE IT(compute_subtree_last)(ITSTRUCT *node)
{
ITTYPE max = ITLAST(node), subtree_last;
if (node->ITRB.rb_left) {
subtree_last = rb_entry(node->ITRB.rb_left,
ITSTRUCT, ITRB)->ITSUBTREE;
if (max < subtree_last)
max = subtree_last;
}
if (node->ITRB.rb_right) {
subtree_last = rb_entry(node->ITRB.rb_right,
ITSTRUCT, ITRB)->ITSUBTREE;
if (max < subtree_last)
max = subtree_last;
}
return max;
}

static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
{
while (rb != stop) {
ITSTRUCT *node = rb_entry(rb, ITSTRUCT, ITRB);
ITTYPE subtree_last = IT(compute_subtree_last)(node);
if (node->ITSUBTREE == subtree_last)
break;
node->ITSUBTREE = subtree_last;
rb = rb_parent(&node->ITRB);
}
}

static void IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new)
{
ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);

new->ITSUBTREE = old->ITSUBTREE;
}

static void IT(augment_rotate)(struct rb_node *rb_old, struct rb_node *rb_new)
{
ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);

new->ITSUBTREE = old->ITSUBTREE;
old->ITSUBTREE = IT(compute_subtree_last)(old);
}

static const struct rb_augment_callbacks IT(augment_callbacks) = {
IT(augment_propagate), IT(augment_copy), IT(augment_rotate)
};

/* Insert / remove interval nodes from the tree */

ITSTATIC void IT(insert)(ITSTRUCT *node, struct rb_root *root)
{
struct rb_node **link = &root->rb_node, *rb_parent = NULL;
ITTYPE start = ITSTART(node), last = ITLAST(node);
ITSTRUCT *parent;

while (*link) {
rb_parent = *link;
parent = rb_entry(rb_parent, ITSTRUCT, ITRB);
if (parent->ITSUBTREE < last)
parent->ITSUBTREE = last;
if (start < ITSTART(parent))
link = &parent->ITRB.rb_left;
else
link = &parent->ITRB.rb_right;
}

node->ITSUBTREE = last;
rb_link_node(&node->ITRB, rb_parent, link);
rb_insert_augmented(&node->ITRB, root, &IT(augment_callbacks));
}

ITSTATIC void IT(remove)(ITSTRUCT *node, struct rb_root *root)
{
rb_erase_augmented(&node->ITRB, root, &IT(augment_callbacks));
}

/*
* Iterate over intervals intersecting [start;last]
*
* Note that a node's interval intersects [start;last] iff:
* Cond1: ITSTART(node) <= last
* and
* Cond2: start <= ITLAST(node)
*/

static ITSTRUCT *IT(subtree_search)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
{
while (true) {
/*
* Loop invariant: start <= node->ITSUBTREE
* (Cond2 is satisfied by one of the subtree nodes)
*/
if (node->ITRB.rb_left) {
ITSTRUCT *left = rb_entry(node->ITRB.rb_left,
ITSTRUCT, ITRB);
if (start <= left->ITSUBTREE) {
/*
* Some nodes in left subtree satisfy Cond2.
* Iterate to find the leftmost such node N.
* If it also satisfies Cond1, that's the match
* we are looking for. Otherwise, there is no
* matching interval as nodes to the right of N
* can't satisfy Cond1 either.
*/
node = left;
continue;
}
}
if (ITSTART(node) <= last) { /* Cond1 */
if (start <= ITLAST(node)) /* Cond2 */
return node; /* node is leftmost match */
if (node->ITRB.rb_right) {
node = rb_entry(node->ITRB.rb_right,
ITSTRUCT, ITRB);
if (start <= node->ITSUBTREE)
continue;
}
}
return NULL; /* No match */
}
}

ITSTATIC ITSTRUCT *IT(iter_first)(struct rb_root *root,
ITTYPE start, ITTYPE last)
{
ITSTRUCT *node;

if (!root->rb_node)
return NULL;
node = rb_entry(root->rb_node, ITSTRUCT, ITRB);
if (node->ITSUBTREE < start)
return NULL;
return IT(subtree_search)(node, start, last);
}

ITSTATIC ITSTRUCT *IT(iter_next)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
{
struct rb_node *rb = node->ITRB.rb_right, *prev;

while (true) {
/*
* Loop invariants:
* Cond1: ITSTART(node) <= last
* rb == node->ITRB.rb_right
*
* First, search right subtree if suitable
*/
if (rb) {
ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB);
if (start <= right->ITSUBTREE)
return IT(subtree_search)(right, start, last);
}

/* Move up the tree until we come from a node's left child */
do {
rb = rb_parent(&node->ITRB);
if (!rb)
return NULL;
prev = &node->ITRB;
node = rb_entry(rb, ITSTRUCT, ITRB);
rb = node->ITRB.rb_right;
} while (prev == rb);

/* Check if the node intersects [start;last] */
if (last < ITSTART(node)) /* !Cond1 */
return NULL;
else if (start <= ITLAST(node)) /* Cond2 */
return node;
}
}
30 changes: 17 additions & 13 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include <linux/list.h>
#include <linux/mmzone.h>
#include <linux/rbtree.h>
#include <linux/prio_tree.h>
#include <linux/atomic.h>
#include <linux/debug_locks.h>
#include <linux/mm_types.h>
Expand Down Expand Up @@ -1355,22 +1354,27 @@ extern void zone_pcp_reset(struct zone *zone);
extern atomic_long_t mmap_pages_allocated;
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);

/* prio_tree.c */
void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *);
struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
struct prio_tree_iter *iter);

#define vma_prio_tree_foreach(vma, iter, root, begin, end) \
for (prio_tree_iter_init(iter, root, begin, end), vma = NULL; \
(vma = vma_prio_tree_next(vma, iter)); )
/* interval_tree.c */
void vma_interval_tree_add(struct vm_area_struct *vma,
struct vm_area_struct *old,
struct address_space *mapping);
void vma_interval_tree_insert(struct vm_area_struct *node,
struct rb_root *root);
void vma_interval_tree_remove(struct vm_area_struct *node,
struct rb_root *root);
struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root,
unsigned long start, unsigned long last);
struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
unsigned long start, unsigned long last);

#define vma_interval_tree_foreach(vma, root, start, last) \
for (vma = vma_interval_tree_iter_first(root, start, last); \
vma; vma = vma_interval_tree_iter_next(vma, start, last))

static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
struct list_head *list)
{
vma->shared.vm_set.parent = NULL;
list_add_tail(&vma->shared.vm_set.list, list);
list_add_tail(&vma->shared.nonlinear, list);
}

/* mmap.c */
Expand Down
Loading

0 comments on commit 6b2dbba

Please sign in to comment.