Skip to content

Commit

Permalink
radix_tree: add support for multi-order entries
Browse files Browse the repository at this point in the history
With huge pages, it is convenient to have the radix tree be able to
return an entry that covers multiple indices.  Previous attempts to deal
with the problem have involved inserting N duplicate entries, which is a
waste of memory and leads to problems trying to handle aliased tags, or
probing the tree multiple times to find alternative entries which might
cover the requested index.

This approach inserts one canonical entry into the tree for a given
range of indices, and may also insert other entries in order to ensure
that lookups find the canonical entry.

This solution only tolerates inserting powers of two that are greater
than the fanout of the tree.  If we wish to expand the radix tree's
abilities to support large-ish pages that is less than the fanout at the
penultimate level of the tree, then we would need to add one more step
in lookup to ensure that any sibling nodes in the final level of the
tree are dereferenced and we return the canonical entry that they
reference.

Signed-off-by: Matthew Wilcox <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: "Kirill A. Shutemov" <[email protected]>
Cc: Ross Zwisler <[email protected]>
Cc: Hugh Dickins <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Matthew Wilcox authored and torvalds committed Mar 17, 2016
1 parent 0070e28 commit e614523
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 29 deletions.
11 changes: 9 additions & 2 deletions include/linux/radix-tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,15 @@ static inline void radix_tree_replace_slot(void **pslot, void *item)
}

int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
struct radix_tree_node **nodep, void ***slotp);
int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
unsigned order, struct radix_tree_node **nodep,
void ***slotp);
int __radix_tree_insert(struct radix_tree_root *, unsigned long index,
unsigned order, void *);
static inline int radix_tree_insert(struct radix_tree_root *root,
unsigned long index, void *entry)
{
return __radix_tree_insert(root, index, 0, entry);
}
void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
struct radix_tree_node **nodep, void ***slotp);
void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
Expand Down
109 changes: 83 additions & 26 deletions lib/radix-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,8 @@ static inline unsigned long radix_tree_maxindex(unsigned int height)
/*
* Extend a radix tree so it can store key @index.
*/
static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
static int radix_tree_extend(struct radix_tree_root *root,
unsigned long index, unsigned order)
{
struct radix_tree_node *node;
struct radix_tree_node *slot;
Expand All @@ -345,7 +346,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
while (index > radix_tree_maxindex(height))
height++;

if (root->rnode == NULL) {
if ((root->rnode == NULL) && (order == 0)) {
root->height = height;
goto out;
}
Expand Down Expand Up @@ -386,6 +387,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
* __radix_tree_create - create a slot in a radix tree
* @root: radix tree root
* @index: index key
* @order: index occupies 2^order aligned slots
* @nodep: returns node
* @slotp: returns slot
*
Expand All @@ -399,26 +401,29 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
* Returns -ENOMEM, or 0 for success.
*/
int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
struct radix_tree_node **nodep, void ***slotp)
unsigned order, struct radix_tree_node **nodep,
void ***slotp)
{
struct radix_tree_node *node = NULL, *slot;
unsigned int height, shift, offset;
int error;

BUG_ON((0 < order) && (order < RADIX_TREE_MAP_SHIFT));

/* Make sure the tree is high enough. */
if (index > radix_tree_maxindex(root->height)) {
error = radix_tree_extend(root, index);
error = radix_tree_extend(root, index, order);
if (error)
return error;
}

slot = indirect_to_ptr(root->rnode);
slot = root->rnode;

height = root->height;
shift = height * RADIX_TREE_MAP_SHIFT;

offset = 0; /* uninitialised var warning */
while (shift > 0) {
while (shift > order) {
if (slot == NULL) {
/* Have to add a child node. */
if (!(slot = radix_tree_node_alloc(root)))
Expand All @@ -433,15 +438,31 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
} else
rcu_assign_pointer(root->rnode,
ptr_to_indirect(slot));
}
} else if (!radix_tree_is_indirect_ptr(slot))
break;

/* Go a level down */
height--;
shift -= RADIX_TREE_MAP_SHIFT;
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
node = slot;
node = indirect_to_ptr(slot);
slot = node->slots[offset];
slot = indirect_to_ptr(slot);
height--;
}

/* Insert pointers to the canonical entry */
if ((shift - order) > 0) {
int i, n = 1 << (shift - order);
offset = offset & ~(n - 1);
slot = ptr_to_indirect(&node->slots[offset]);
for (i = 0; i < n; i++) {
if (node->slots[offset + i])
return -EEXIST;
}

for (i = 1; i < n; i++) {
rcu_assign_pointer(node->slots[offset + i], slot);
node->count++;
}
}

if (nodep)
Expand All @@ -452,23 +473,24 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
}

/**
* radix_tree_insert - insert into a radix tree
* __radix_tree_insert - insert into a radix tree
* @root: radix tree root
* @index: index key
* @order: key covers the 2^order indices around index
* @item: item to insert
*
* Insert an item into the radix tree at position @index.
*/
int radix_tree_insert(struct radix_tree_root *root,
unsigned long index, void *item)
int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
unsigned order, void *item)
{
struct radix_tree_node *node;
void **slot;
int error;

BUG_ON(radix_tree_is_indirect_ptr(item));

error = __radix_tree_create(root, index, &node, &slot);
error = __radix_tree_create(root, index, order, &node, &slot);
if (error)
return error;
if (*slot != NULL)
Expand All @@ -486,7 +508,7 @@ int radix_tree_insert(struct radix_tree_root *root,

return 0;
}
EXPORT_SYMBOL(radix_tree_insert);
EXPORT_SYMBOL(__radix_tree_insert);

/**
* __radix_tree_lookup - lookup an item in a radix tree
Expand Down Expand Up @@ -537,6 +559,8 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
node = rcu_dereference_raw(*slot);
if (node == NULL)
return NULL;
if (!radix_tree_is_indirect_ptr(node))
break;
node = indirect_to_ptr(node);

shift -= RADIX_TREE_MAP_SHIFT;
Expand Down Expand Up @@ -624,6 +648,8 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
tag_set(slot, tag, offset);
slot = slot->slots[offset];
BUG_ON(slot == NULL);
if (!radix_tree_is_indirect_ptr(slot))
break;
slot = indirect_to_ptr(slot);
shift -= RADIX_TREE_MAP_SHIFT;
height--;
Expand Down Expand Up @@ -669,6 +695,8 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
while (shift) {
if (slot == NULL)
goto out;
if (!radix_tree_is_indirect_ptr(slot))
break;
slot = indirect_to_ptr(slot);

shift -= RADIX_TREE_MAP_SHIFT;
Expand Down Expand Up @@ -753,6 +781,8 @@ int radix_tree_tag_get(struct radix_tree_root *root,
if (height == 1)
return 1;
node = rcu_dereference_raw(node->slots[offset]);
if (!radix_tree_is_indirect_ptr(node))
return 1;
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}
Expand Down Expand Up @@ -813,6 +843,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,

node = rnode;
while (1) {
struct radix_tree_node *slot;
if ((flags & RADIX_TREE_ITER_TAGGED) ?
!test_bit(offset, node->tags[tag]) :
!node->slots[offset]) {
Expand Down Expand Up @@ -843,10 +874,12 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
if (!shift)
break;

node = rcu_dereference_raw(node->slots[offset]);
if (node == NULL)
slot = rcu_dereference_raw(node->slots[offset]);
if (slot == NULL)
goto restart;
node = indirect_to_ptr(node);
if (!radix_tree_is_indirect_ptr(slot))
break;
node = indirect_to_ptr(slot);
shift -= RADIX_TREE_MAP_SHIFT;
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
}
Expand Down Expand Up @@ -944,16 +977,20 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
if (!tag_get(slot, iftag, offset))
goto next;
if (shift) {
/* Go down one level */
shift -= RADIX_TREE_MAP_SHIFT;
node = slot;
slot = slot->slots[offset];
slot = indirect_to_ptr(slot);
continue;
if (radix_tree_is_indirect_ptr(slot)) {
slot = indirect_to_ptr(slot);
shift -= RADIX_TREE_MAP_SHIFT;
continue;
} else {
slot = node;
node = node->parent;
}
}

/* tag the leaf */
tagged++;
tagged += 1 << shift;
tag_set(slot, settag, offset);

/* walk back up the path tagging interior nodes */
Expand Down Expand Up @@ -1201,11 +1238,20 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item,
goto out;
}

shift -= RADIX_TREE_MAP_SHIFT;
slot = rcu_dereference_raw(slot->slots[i]);
if (slot == NULL)
goto out;
if (!radix_tree_is_indirect_ptr(slot)) {
if (slot == item) {
*found_index = index + i;
index = 0;
} else {
index += shift;
}
goto out;
}
slot = indirect_to_ptr(slot);
shift -= RADIX_TREE_MAP_SHIFT;
}

/* Bottom level: check items */
Expand Down Expand Up @@ -1285,7 +1331,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)

/*
* The candidate node has more than one child, or its child
* is not at the leftmost slot, we cannot shrink.
* is not at the leftmost slot, or it is a multiorder entry,
* we cannot shrink.
*/
if (to_free->count != 1)
break;
Expand All @@ -1301,6 +1348,9 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
* one (root->rnode) as far as dependent read barriers go.
*/
if (root->height > 1) {
if (!radix_tree_is_indirect_ptr(slot))
break;

slot = indirect_to_ptr(slot);
slot->parent = NULL;
slot = ptr_to_indirect(slot);
Expand Down Expand Up @@ -1399,7 +1449,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
unsigned long index, void *item)
{
struct radix_tree_node *node;
unsigned int offset;
unsigned int offset, i;
void **slot;
void *entry;
int tag;
Expand Down Expand Up @@ -1428,6 +1478,13 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
radix_tree_tag_clear(root, index, tag);
}

/* Delete any sibling slots pointing to this slot */
for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
if (node->slots[offset + i] != ptr_to_indirect(slot))
break;
node->slots[offset + i] = NULL;
node->count--;
}
node->slots[offset] = NULL;
node->count--;

Expand Down
2 changes: 1 addition & 1 deletion mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
void **slot;
int error;

error = __radix_tree_create(&mapping->page_tree, page->index,
error = __radix_tree_create(&mapping->page_tree, page->index, 0,
&node, &slot);
if (error)
return error;
Expand Down

0 comments on commit e614523

Please sign in to comment.