Skip to content

Commit

Permalink
Btrfs: fix extent pinning bugs in the tree log
Browse files Browse the repository at this point in the history
The tree log had two important bugs that could cause corruptions after a
crash.  Sometimes we were allowing tree log blocks to be reused after
the tree log was committed but before the transaction commit was done.

This allowed a future metadata write to overwrite the tree log data.  It
is fixed by adding a new variant of freeing reserved extents that always
pins them.  Credit goes to Stefan Behrens and Arne Jansen for many many
hours spent tracking this bug down.

During tree log replay, we do a pass through the tree log and pin all
the extents we find.  This makes sure the replay code won't go in and
use any of those blocks for new allocations during replay.  The problem
is the free space cache isn't honoring these pinned extents.  So the
allocator can end up handing them out, leading to all kinds of problems
during replay.

The fix here is to force any free space cache to load while we pin the
extents, and then to make sure we remove the pinned extents from the
free space rbtree.

Signed-off-by: Chris Mason <[email protected]>
Reported-by: Stefan Behrens <[email protected]>
  • Loading branch information
chrismason-xx committed Nov 6, 2011
1 parent 1eae31e commit e688b72
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 8 deletions.
5 changes: 5 additions & 0 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2156,6 +2156,9 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
u64 num_bytes, u64 *refs, u64 *flags);
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num, int reserved);
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 offset, u64 bytenr);
Expand Down Expand Up @@ -2206,6 +2209,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 root_objectid, u64 owner, u64 offset);

int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
u64 start, u64 len);
int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
Expand Down
51 changes: 48 additions & 3 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -4344,6 +4344,34 @@ int btrfs_pin_extent(struct btrfs_root *root,
return 0;
}

/*
* this function must be called within transaction
*/
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes)
{
struct btrfs_block_group_cache *cache;

cache = btrfs_lookup_block_group(root->fs_info, bytenr);
BUG_ON(!cache);

/*
* pull in the free space cache (if any) so that our pin
* removes the free space from the cache. We have load_only set
* to one because the slow code to read in the free extents does check
* the pinned extents.
*/
cache_block_group(cache, trans, root, 1);

pin_down_extent(root, cache, bytenr, num_bytes, 0);

/* remove us from the free space cache (if we're there at all) */
btrfs_remove_free_space(cache, bytenr, num_bytes);
btrfs_put_block_group(cache);
return 0;
}

/**
* btrfs_update_reserved_bytes - update the block_group and space info counters
* @cache: The cache we are manipulating
Expand Down Expand Up @@ -5487,7 +5515,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
return ret;
}

int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
static int __btrfs_free_reserved_extent(struct btrfs_root *root,
u64 start, u64 len, int pin)
{
struct btrfs_block_group_cache *cache;
int ret = 0;
Expand All @@ -5502,15 +5531,31 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
if (btrfs_test_opt(root, DISCARD))
ret = btrfs_discard_extent(root, start, len, NULL);

btrfs_add_free_space(cache, start, len);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
if (pin)
pin_down_extent(root, cache, start, len, 1);
else {
btrfs_add_free_space(cache, start, len);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
}
btrfs_put_block_group(cache);

trace_btrfs_reserved_extent_free(root, start, len);

return ret;
}

int btrfs_free_reserved_extent(struct btrfs_root *root,
u64 start, u64 len)
{
return __btrfs_free_reserved_extent(root, start, len, 0);
}

int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
u64 start, u64 len)
{
return __btrfs_free_reserved_extent(root, start, len, 1);
}

static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
Expand Down
11 changes: 6 additions & 5 deletions fs/btrfs/tree-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,9 @@ static int process_one_buffer(struct btrfs_root *log,
struct walk_control *wc, u64 gen)
{
if (wc->pin)
btrfs_pin_extent(log->fs_info->extent_root,
eb->start, eb->len, 0);
btrfs_pin_extent_for_log_replay(wc->trans,
log->fs_info->extent_root,
eb->start, eb->len);

if (btrfs_buffer_uptodate(eb, gen)) {
if (wc->write)
Expand Down Expand Up @@ -1760,7 +1761,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,

WARN_ON(root_owner !=
BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_free_reserved_extent(root,
ret = btrfs_free_and_pin_reserved_extent(root,
bytenr, blocksize);
BUG_ON(ret);
}
Expand Down Expand Up @@ -1828,7 +1829,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(next);

WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_free_reserved_extent(root,
ret = btrfs_free_and_pin_reserved_extent(root,
path->nodes[*level]->start,
path->nodes[*level]->len);
BUG_ON(ret);
Expand Down Expand Up @@ -1897,7 +1898,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,

WARN_ON(log->root_key.objectid !=
BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_free_reserved_extent(log, next->start,
ret = btrfs_free_and_pin_reserved_extent(log, next->start,
next->len);
BUG_ON(ret);
}
Expand Down

0 comments on commit e688b72

Please sign in to comment.