Skip to content

Commit

Permalink
Btrfs: Introduce global metadata reservation
Browse files Browse the repository at this point in the history
Reserve metadata space for extent tree, checksum tree and root tree

Signed-off-by: Yan Zheng <[email protected]>
Signed-off-by: Chris Mason <[email protected]>
  • Loading branch information
Yan, Zheng authored and chrismason-xx committed May 25, 2010
1 parent 0ca1f7c commit 8929ecf
Show file tree
Hide file tree
Showing 8 changed files with 241 additions and 76 deletions.
8 changes: 1 addition & 7 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -683,21 +683,15 @@ struct btrfs_space_info {
u64 bytes_reserved; /* total bytes the allocator has reserved for
current allocations */
u64 bytes_readonly; /* total bytes that are read only */
u64 bytes_super; /* total bytes reserved for the super blocks */
u64 bytes_root; /* the number of bytes needed to commit a
transaction */

u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */
u64 bytes_delalloc; /* number of bytes currently reserved for
delayed allocation */
u64 disk_used; /* total bytes used on disk */

int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
this space */
int force_delalloc; /* make people start doing filemap_flush until
we're under a threshold */

struct list_head list;

Expand Down
59 changes: 29 additions & 30 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1463,10 +1463,6 @@ static int cleaner_kthread(void *arg)
struct btrfs_root *root = arg;

do {
smp_mb();
if (root->fs_info->closing)
break;

vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);

if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
Expand All @@ -1479,11 +1475,9 @@ static int cleaner_kthread(void *arg)
if (freezing(current)) {
refrigerator();
} else {
smp_mb();
if (root->fs_info->closing)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule();
if (!kthread_should_stop())
schedule();
__set_current_state(TASK_RUNNING);
}
} while (!kthread_should_stop());
Expand All @@ -1495,47 +1489,51 @@ static int transaction_kthread(void *arg)
struct btrfs_root *root = arg;
struct btrfs_trans_handle *trans;
struct btrfs_transaction *cur;
u64 transid;
unsigned long now;
unsigned long delay;
int ret;

do {
smp_mb();
if (root->fs_info->closing)
break;

delay = HZ * 30;
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
mutex_lock(&root->fs_info->transaction_kthread_mutex);

mutex_lock(&root->fs_info->trans_mutex);
spin_lock(&root->fs_info->new_trans_lock);
cur = root->fs_info->running_transaction;
if (!cur) {
mutex_unlock(&root->fs_info->trans_mutex);
spin_unlock(&root->fs_info->new_trans_lock);
goto sleep;
}

now = get_seconds();
if (now < cur->start_time || now - cur->start_time < 30) {
mutex_unlock(&root->fs_info->trans_mutex);
if (!cur->blocked &&
(now < cur->start_time || now - cur->start_time < 30)) {
spin_unlock(&root->fs_info->new_trans_lock);
delay = HZ * 5;
goto sleep;
}
mutex_unlock(&root->fs_info->trans_mutex);
trans = btrfs_join_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
transid = cur->transid;
spin_unlock(&root->fs_info->new_trans_lock);

trans = btrfs_join_transaction(root, 1);
if (transid == trans->transid) {
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
} else {
btrfs_end_transaction(trans, root);
}
sleep:
wake_up_process(root->fs_info->cleaner_kthread);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);

if (freezing(current)) {
refrigerator();
} else {
if (root->fs_info->closing)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(delay);
if (!kthread_should_stop() &&
!btrfs_transaction_blocked(root->fs_info))
schedule_timeout(delay);
__set_current_state(TASK_RUNNING);
}
} while (!kthread_should_stop());
Expand Down Expand Up @@ -1917,17 +1915,18 @@ struct btrfs_root *open_ctree(struct super_block *sb,

csum_root->track_dirty = 1;

fs_info->generation = generation;
fs_info->last_trans_committed = generation;
fs_info->data_alloc_profile = (u64)-1;
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;

ret = btrfs_read_block_groups(extent_root);
if (ret) {
printk(KERN_ERR "Failed to read block groups: %d\n", ret);
goto fail_block_groups;
}

fs_info->generation = generation;
fs_info->last_trans_committed = generation;
fs_info->data_alloc_profile = (u64)-1;
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread))
Expand Down Expand Up @@ -2430,15 +2429,15 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();

kthread_stop(root->fs_info->transaction_kthread);
kthread_stop(root->fs_info->cleaner_kthread);

if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}

kthread_stop(root->fs_info->transaction_kthread);
kthread_stop(root->fs_info->cleaner_kthread);

fs_info->closing = 2;
smp_mb();

Expand Down
147 changes: 128 additions & 19 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -2895,10 +2895,9 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc +
data_sinfo->bytes_reserved + data_sinfo->bytes_pinned +
data_sinfo->bytes_readonly + data_sinfo->bytes_may_use +
data_sinfo->bytes_super;
used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
data_sinfo->bytes_may_use;

if (used + bytes > data_sinfo->total_bytes) {
struct btrfs_trans_handle *trans;
Expand All @@ -2922,7 +2921,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
bytes + 2 * 1024 * 1024,
alloc_target, 0);
btrfs_end_transaction(trans, root);
if (ret)
if (ret < 0)
return ret;

if (!data_sinfo) {
Expand All @@ -2945,11 +2944,10 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
goto again;
}

printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
", %llu bytes_used, %llu bytes_reserved, "
"%llu bytes_pinned, %llu bytes_readonly, %llu may use "
"%llu total\n", (unsigned long long)bytes,
(unsigned long long)data_sinfo->bytes_delalloc,
printk(KERN_ERR "no space left, need %llu, %llu bytes_used, "
"%llu bytes_reserved, " "%llu bytes_pinned, "
"%llu bytes_readonly, %llu may use %llu total\n",
(unsigned long long)bytes,
(unsigned long long)data_sinfo->bytes_used,
(unsigned long long)data_sinfo->bytes_reserved,
(unsigned long long)data_sinfo->bytes_pinned,
Expand Down Expand Up @@ -3464,6 +3462,91 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
}

/*
* helper to calculate size of global block reservation.
* the desired value is sum of space used by extent tree,
* checksum tree and root tree
*/
static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *sinfo;
u64 num_bytes;
u64 meta_used;
u64 data_used;
int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
#if 0
/*
* per tree used space accounting can be inaccuracy, so we
* can't rely on it.
*/
spin_lock(&fs_info->extent_root->accounting_lock);
num_bytes = btrfs_root_used(&fs_info->extent_root->root_item);
spin_unlock(&fs_info->extent_root->accounting_lock);

spin_lock(&fs_info->csum_root->accounting_lock);
num_bytes += btrfs_root_used(&fs_info->csum_root->root_item);
spin_unlock(&fs_info->csum_root->accounting_lock);

spin_lock(&fs_info->tree_root->accounting_lock);
num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
spin_unlock(&fs_info->tree_root->accounting_lock);
#endif
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
spin_lock(&sinfo->lock);
data_used = sinfo->bytes_used;
spin_unlock(&sinfo->lock);

sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&sinfo->lock);
meta_used = sinfo->bytes_used;
spin_unlock(&sinfo->lock);

num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
csum_size * 2;
num_bytes += div64_u64(data_used + meta_used, 50);

if (num_bytes * 3 > meta_used)
num_bytes = div64_u64(meta_used, 3);

return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
}

static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
{
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
struct btrfs_space_info *sinfo = block_rsv->space_info;
u64 num_bytes;

num_bytes = calc_global_metadata_size(fs_info);

spin_lock(&block_rsv->lock);
spin_lock(&sinfo->lock);

block_rsv->size = num_bytes;

num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
sinfo->bytes_reserved + sinfo->bytes_readonly;

if (sinfo->total_bytes > num_bytes) {
num_bytes = sinfo->total_bytes - num_bytes;
block_rsv->reserved += num_bytes;
sinfo->bytes_reserved += num_bytes;
}

if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_reserved -= num_bytes;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
#if 0
printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
block_rsv->size, block_rsv->reserved);
#endif
spin_unlock(&sinfo->lock);
spin_unlock(&block_rsv->lock);
}

static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *space_info;
Expand All @@ -3473,11 +3556,36 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
fs_info->chunk_block_rsv.priority = 10;

space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
fs_info->global_block_rsv.space_info = space_info;
fs_info->global_block_rsv.priority = 10;
fs_info->global_block_rsv.refill_used = 1;
fs_info->delalloc_block_rsv.space_info = space_info;
fs_info->trans_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.priority = 10;

fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;

btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv);

btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv);

update_global_block_rsv(fs_info);
}

static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
{
block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1);
WARN_ON(fs_info->delalloc_block_rsv.size > 0);
WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
WARN_ON(fs_info->trans_block_rsv.size > 0);
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
WARN_ON(fs_info->chunk_block_rsv.size > 0);
WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
}

static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
Expand Down Expand Up @@ -3826,6 +3934,8 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
fs_info->pinned_extents = &fs_info->freed_extents[0];

up_write(&fs_info->extent_commit_sem);

update_global_block_rsv(fs_info);
return 0;
}

Expand Down Expand Up @@ -4818,19 +4928,16 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
printk(KERN_INFO "space_info has %llu free, is %sfull\n",
(unsigned long long)(info->total_bytes - info->bytes_used -
info->bytes_pinned - info->bytes_reserved -
info->bytes_super),
info->bytes_readonly),
(info->full) ? "" : "not ");
printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
" may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu"
"\n",
printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
"reserved=%llu, may_use=%llu, readonly=%llu\n",
(unsigned long long)info->total_bytes,
(unsigned long long)info->bytes_used,
(unsigned long long)info->bytes_pinned,
(unsigned long long)info->bytes_delalloc,
(unsigned long long)info->bytes_reserved,
(unsigned long long)info->bytes_may_use,
(unsigned long long)info->bytes_used,
(unsigned long long)info->bytes_root,
(unsigned long long)info->bytes_super,
(unsigned long long)info->bytes_reserved);
(unsigned long long)info->bytes_readonly);
spin_unlock(&info->lock);

if (!dump_block_groups)
Expand Down Expand Up @@ -7727,6 +7834,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
*/
synchronize_rcu();

release_global_block_rsv(info);

while(!list_empty(&info->space_info)) {
space_info = list_entry(info->space_info.next,
struct btrfs_space_info,
Expand Down
13 changes: 11 additions & 2 deletions fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -4060,7 +4060,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
struct btrfs_trans_handle *trans;
int ret = 0;

if (root->fs_info->btree_inode == inode)
if (BTRFS_I(inode)->dummy_inode)
return 0;

if (wbc->sync_mode == WB_SYNC_ALL) {
Expand All @@ -4081,10 +4081,19 @@ void btrfs_dirty_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret;

if (BTRFS_I(inode)->dummy_inode)
return;

trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
btrfs_update_inode(trans, root, inode);

ret = btrfs_update_inode(trans, root, inode);
if (ret)
printk(KERN_ERR"btrfs: fail to dirty inode %lu error %d\n",
inode->i_ino, ret);

btrfs_end_transaction(trans, root);
}

Expand Down
Loading

0 comments on commit 8929ecf

Please sign in to comment.