Skip to content

Commit

Permalink
Btrfs: rework qgroup accounting
Browse files Browse the repository at this point in the history
Currently qgroups account for space by intercepting delayed ref updates to fs
trees.  It does this by adding sequence numbers to delayed ref updates so that
it can figure out how the tree looked before the update so we can adjust the
counters properly.  The problem with this is that it does not allow delayed refs
to be merged, so if you say are defragging an extent with 5k snapshots pointing
to it we will thrash the delayed ref lock because we need to go back and
manually merge these things together.  Instead we want to process quota changes
when we know they are going to happen, like when we first allocate an extent, we
free a reference for an extent, we add new references etc.  This patch
accomplishes this by only adding qgroup operations for real ref changes.  We
only modify the sequence number when we need to lookup roots for bytenrs, this
reduces the amount of churn on the sequence number and allows us to merge
delayed refs as we add them most of the time.  This patch encompasses a bunch of
architectural changes

1) qgroup ref operations: instead of tracking qgroup operations through the
delayed refs we simply add new ref operations whenever we notice that we need to
when we've modified the refs themselves.

2) tree mod seq:  we no longer have this separation of major/minor counters.
this makes the sequence number stuff much more sane and we can remove some
locking that was needed to protect the counter.

3) delayed ref seq: we now read the tree mod seq number and use that as our
sequence.  This means each new delayed ref doesn't have it's own unique sequence
number, rather whenever we go to lookup backrefs we inc the sequence number so
we can make sure to keep any new operations from screwing up our world view at
that given point.  This allows us to merge delayed refs during runtime.

With all of these changes the delayed ref stuff is a little saner and the qgroup
accounting stuff no longer goes negative in some cases like it was before.
Thanks,

Signed-off-by: Josef Bacik <[email protected]>
Signed-off-by: Chris Mason <[email protected]>
  • Loading branch information
Josef Bacik authored and masoncl committed Jun 10, 2014
1 parent 5dca6ee commit fcebe45
Show file tree
Hide file tree
Showing 12 changed files with 1,044 additions and 479 deletions.
4 changes: 2 additions & 2 deletions fs/btrfs/backref.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);

int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots);
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
Expand Down
45 changes: 5 additions & 40 deletions fs/btrfs/ctree.c
Original file line number Diff line number Diff line change
Expand Up @@ -356,43 +356,13 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
}

/*
* Increment the upper half of tree_mod_seq, set lower half zero.
*
* Must be called with fs_info->tree_mod_seq_lock held.
*/
static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
{
u64 seq = atomic64_read(&fs_info->tree_mod_seq);
seq &= 0xffffffff00000000ull;
seq += 1ull << 32;
atomic64_set(&fs_info->tree_mod_seq, seq);
return seq;
}

/*
* Increment the lower half of tree_mod_seq.
*
* Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
* are generated should not technically require a spin lock here. (Rationale:
* incrementing the minor while incrementing the major seq number is between its
* atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
* just returns a unique sequence number as usual.) We have decided to leave
* that requirement in here and rethink it once we notice it really imposes a
* problem on some workload.
* Pull a new tree mod seq number for our operation.
*/
static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info)
static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{
return atomic64_inc_return(&fs_info->tree_mod_seq);
}

/*
* return the last minor in the previous major tree_mod_seq number
*/
u64 btrfs_tree_mod_seq_prev(u64 seq)
{
return (seq & 0xffffffff00000000ull) - 1ull;
}

/*
* This adds a new blocker to the tree mod log's blocker list if the @elem
* passed does not already have a sequence number set. So when a caller expects
Expand All @@ -404,19 +374,16 @@ u64 btrfs_tree_mod_seq_prev(u64 seq)
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem)
{
u64 seq;

tree_mod_log_write_lock(fs_info);
spin_lock(&fs_info->tree_mod_seq_lock);
if (!elem->seq) {
elem->seq = btrfs_inc_tree_mod_seq_major(fs_info);
elem->seq = btrfs_inc_tree_mod_seq(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info);

return seq;
return elem->seq;
}

void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
Expand Down Expand Up @@ -489,9 +456,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)

BUG_ON(!tm);

spin_lock(&fs_info->tree_mod_seq_lock);
tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tm->seq = btrfs_inc_tree_mod_seq(fs_info);

tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node;
Expand Down
59 changes: 7 additions & 52 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1648,7 +1648,10 @@ struct btrfs_fs_info {

/* holds configuration and tracking. Protected by qgroup_lock */
struct rb_root qgroup_tree;
struct rb_root qgroup_op_tree;
spinlock_t qgroup_lock;
spinlock_t qgroup_op_lock;
atomic_t qgroup_op_seq;

/*
* used to avoid frequently calling ulist_alloc()/ulist_free()
Expand Down Expand Up @@ -3300,17 +3303,17 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref, int for_cow);
struct extent_buffer *buf, int full_backref, int no_quota);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref, int for_cow);
struct extent_buffer *buf, int full_backref, int no_quota);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
int level, int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int for_cow);
u64 owner, u64 offset, int no_quota);

int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
Expand All @@ -3322,7 +3325,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset, int for_cow);
u64 root_objectid, u64 owner, u64 offset, int no_quota);

int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
Expand Down Expand Up @@ -3410,7 +3413,6 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int __get_raid_index(u64 flags);

int btrfs_start_nocow_write(struct btrfs_root *root);
void btrfs_end_nocow_write(struct btrfs_root *root);
/* ctree.c */
Expand Down Expand Up @@ -3586,7 +3588,6 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
u64 btrfs_tree_mod_seq_prev(u64 seq);
int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);

/* root-item.c */
Expand Down Expand Up @@ -4094,52 +4095,6 @@ void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err);

/* qgroup.c */
struct qgroup_update {
struct list_head list;
struct btrfs_delayed_ref_node *node;
struct btrfs_delayed_extent_op *extent_op;
};

int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
char *name);
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid);
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);

void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);

static inline int is_fstree(u64 rootid)
{
if (rootid == BTRFS_FS_TREE_OBJECTID ||
Expand Down
39 changes: 23 additions & 16 deletions fs/btrfs/delayed-ref.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
return -1;
if (ref1->type > ref2->type)
return 1;
if (ref1->no_quota > ref2->no_quota)
return 1;
if (ref1->no_quota < ref2->no_quota)
return -1;
/* merging of sequenced refs is not allowed */
if (compare_seq) {
if (ref1->seq < ref2->seq)
Expand Down Expand Up @@ -635,7 +639,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, int level,
int action, int for_cow)
int action, int no_quota)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_tree_ref *full_ref;
Expand All @@ -645,6 +649,8 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;

if (is_fstree(ref_root))
seq = atomic64_read(&fs_info->tree_mod_seq);
delayed_refs = &trans->transaction->delayed_refs;

/* first set the basic ref node struct up */
Expand All @@ -655,9 +661,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;

if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->no_quota = no_quota;
ref->seq = seq;

full_ref = btrfs_delayed_node_to_tree_ref(ref);
Expand Down Expand Up @@ -697,7 +701,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
u64 offset, int action, int for_cow)
u64 offset, int action, int no_quota)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_data_ref *full_ref;
Expand All @@ -709,6 +713,9 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,

delayed_refs = &trans->transaction->delayed_refs;

if (is_fstree(ref_root))
seq = atomic64_read(&fs_info->tree_mod_seq);

/* first set the basic ref node struct up */
atomic_set(&ref->refs, 1);
ref->bytenr = bytenr;
Expand All @@ -717,9 +724,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;

if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->no_quota = no_quota;
ref->seq = seq;

full_ref = btrfs_delayed_node_to_data_ref(ref);
Expand Down Expand Up @@ -762,12 +767,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int for_cow)
int no_quota)
{
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;

if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;

BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
Expand All @@ -793,10 +801,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,

add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
no_quota);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);

return 0;
}
Expand All @@ -810,12 +816,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op,
int for_cow)
int no_quota)
{
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;

if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;

BUG_ON(extent_op && !extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
Expand All @@ -841,10 +850,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,

add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
action, no_quota);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);

return 0;
}
Expand Down
24 changes: 3 additions & 21 deletions fs/btrfs/delayed-ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ struct btrfs_delayed_ref_node {

unsigned int action:8;
unsigned int type:8;
unsigned int no_quota:1;
/* is this node still in the rbtree? */
unsigned int is_head:1;
unsigned int in_tree:1;
Expand Down Expand Up @@ -196,14 +197,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int for_cow);
int no_quota);
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op,
int for_cow);
int no_quota);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
Expand All @@ -230,25 +231,6 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);

/*
* delayed refs with a ref_seq > 0 must be held back during backref walking.
* this only applies to items in one of the fs-trees. for_cow items never need
* to be held back, so they won't get a ref_seq number.
*/
static inline int need_ref_seq(int for_cow, u64 rootid)
{
if (for_cow)
return 0;

if (rootid == BTRFS_FS_TREE_OBJECTID)
return 1;

if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;

return 0;
}

/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.
Expand Down
Loading

0 comments on commit fcebe45

Please sign in to comment.