Skip to content

Commit

Permalink
Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This has our merge window series of cleanups and fixes.  These target
  a wide range of issues, but do include some important fixes for
  qgroups, O_DIRECT, and fsync handling.  Jeff Mahoney moved around a
  few definitions to make them easier for userland to consume.

  Also whiteout support is included now that issues with overlayfs have
  been cleared up.

  I have one more fix pending for page faults during btrfs_copy_from_user,
  but I wanted to get this bulk out the door first"

* 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (90 commits)
  btrfs: fix memory leak during RAID 5/6 device replacement
  Btrfs: add semaphore to synchronize direct IO writes with fsync
  Btrfs: fix race between block group relocation and nocow writes
  Btrfs: fix race between fsync and direct IO writes for prealloc extents
  Btrfs: fix number of transaction units for renames with whiteout
  Btrfs: pin logs earlier when doing a rename exchange operation
  Btrfs: unpin logs if rename exchange operation fails
  Btrfs: fix inode leak on failure to setup whiteout inode in rename
  btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT
  Btrfs: pin log earlier when renaming
  Btrfs: unpin log if rename operation fails
  Btrfs: don't do unnecessary delalloc flushes when relocating
  Btrfs: don't wait for unrelated IO to finish before relocation
  Btrfs: fix empty symlink after creating symlink and fsync parent dir
  Btrfs: fix for incorrect directory entries after fsync log replay
  btrfs: build fixup for qgroup_account_snapshot
  btrfs: qgroup: Fix qgroup accounting when creating snapshot
  Btrfs: fix fspath error deallocation
  btrfs: make find_workspace warn if there are no workspaces
  btrfs: make find_workspace always succeed
  ...
  • Loading branch information
torvalds committed May 21, 2016
2 parents 63d222b + c315ef8 commit 07be133
Show file tree
Hide file tree
Showing 30 changed files with 2,678 additions and 1,794 deletions.
2 changes: 1 addition & 1 deletion fs/btrfs/backref.c
Original file line number Diff line number Diff line change
Expand Up @@ -1991,7 +1991,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,

ifp = kmalloc(sizeof(*ifp), GFP_NOFS);
if (!ifp) {
kfree(fspath);
vfree(fspath);
return ERR_PTR(-ENOMEM);
}

Expand Down
10 changes: 10 additions & 0 deletions fs/btrfs/btrfs_inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,16 @@ struct btrfs_inode {
struct list_head delayed_iput;
long delayed_iput_count;

/*
* To avoid races between lockless (i_mutex not held) direct IO writes
* and concurrent fsync requests. Direct IO writes must acquire read
* access on this semaphore for creating an extent map and its
* corresponding ordered extent. The fast fsync path must acquire write
* access on this semaphore before it collects ordered extents and
* extent maps.
*/
struct rw_semaphore dio_sem;

struct inode vfs_inode;
};

Expand Down
85 changes: 61 additions & 24 deletions fs/btrfs/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -743,8 +743,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
static struct {
struct list_head idle_ws;
spinlock_t ws_lock;
int num_ws;
atomic_t alloc_ws;
/* Number of free workspaces */
int free_ws;
/* Total number of allocated workspaces */
atomic_t total_ws;
/* Waiters for a free workspace */
wait_queue_head_t ws_wait;
} btrfs_comp_ws[BTRFS_COMPRESS_TYPES];

Expand All @@ -758,16 +761,34 @@ void __init btrfs_init_compress(void)
int i;

for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
struct list_head *workspace;

INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
spin_lock_init(&btrfs_comp_ws[i].ws_lock);
atomic_set(&btrfs_comp_ws[i].alloc_ws, 0);
atomic_set(&btrfs_comp_ws[i].total_ws, 0);
init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);

/*
* Preallocate one workspace for each compression type so
* we can guarantee forward progress in the worst case
*/
workspace = btrfs_compress_op[i]->alloc_workspace();
if (IS_ERR(workspace)) {
printk(KERN_WARNING
"BTRFS: cannot preallocate compression workspace, will try later");
} else {
atomic_set(&btrfs_comp_ws[i].total_ws, 1);
btrfs_comp_ws[i].free_ws = 1;
list_add(workspace, &btrfs_comp_ws[i].idle_ws);
}
}
}

/*
* this finds an available workspace or allocates a new one
* ERR_PTR is returned if things go bad.
* This finds an available workspace or allocates a new one.
* If it's not possible to allocate a new one, waits until there's one.
* Preallocation makes a forward progress guarantees and we do not return
* errors.
*/
static struct list_head *find_workspace(int type)
{
Expand All @@ -777,36 +798,58 @@ static struct list_head *find_workspace(int type)

struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws;
atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
int *num_ws = &btrfs_comp_ws[idx].num_ws;
int *free_ws = &btrfs_comp_ws[idx].free_ws;
again:
spin_lock(ws_lock);
if (!list_empty(idle_ws)) {
workspace = idle_ws->next;
list_del(workspace);
(*num_ws)--;
(*free_ws)--;
spin_unlock(ws_lock);
return workspace;

}
if (atomic_read(alloc_ws) > cpus) {
if (atomic_read(total_ws) > cpus) {
DEFINE_WAIT(wait);

spin_unlock(ws_lock);
prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
if (atomic_read(alloc_ws) > cpus && !*num_ws)
if (atomic_read(total_ws) > cpus && !*free_ws)
schedule();
finish_wait(ws_wait, &wait);
goto again;
}
atomic_inc(alloc_ws);
atomic_inc(total_ws);
spin_unlock(ws_lock);

workspace = btrfs_compress_op[idx]->alloc_workspace();
if (IS_ERR(workspace)) {
atomic_dec(alloc_ws);
atomic_dec(total_ws);
wake_up(ws_wait);

/*
* Do not return the error but go back to waiting. There's a
* workspace preallocated for each type and the compression
* time is bounded so we get to a workspace eventually. This
* makes our caller's life easier.
*
* To prevent silent and low-probability deadlocks (when the
* initial preallocation fails), check if there are any
* workspaces at all.
*/
if (atomic_read(total_ws) == 0) {
static DEFINE_RATELIMIT_STATE(_rs,
/* once per minute */ 60 * HZ,
/* no burst */ 1);

if (__ratelimit(&_rs)) {
printk(KERN_WARNING
"no compression workspaces, low memory, retrying");
}
}
goto again;
}
return workspace;
}
Expand All @@ -820,21 +863,21 @@ static void free_workspace(int type, struct list_head *workspace)
int idx = type - 1;
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws;
atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
int *num_ws = &btrfs_comp_ws[idx].num_ws;
int *free_ws = &btrfs_comp_ws[idx].free_ws;

spin_lock(ws_lock);
if (*num_ws < num_online_cpus()) {
if (*free_ws < num_online_cpus()) {
list_add(workspace, idle_ws);
(*num_ws)++;
(*free_ws)++;
spin_unlock(ws_lock);
goto wake;
}
spin_unlock(ws_lock);

btrfs_compress_op[idx]->free_workspace(workspace);
atomic_dec(alloc_ws);
atomic_dec(total_ws);
wake:
/*
* Make sure counter is updated before we wake up waiters.
Expand All @@ -857,7 +900,7 @@ static void free_workspaces(void)
workspace = btrfs_comp_ws[i].idle_ws.next;
list_del(workspace);
btrfs_compress_op[i]->free_workspace(workspace);
atomic_dec(&btrfs_comp_ws[i].alloc_ws);
atomic_dec(&btrfs_comp_ws[i].total_ws);
}
}
}
Expand Down Expand Up @@ -894,8 +937,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
int ret;

workspace = find_workspace(type);
if (IS_ERR(workspace))
return PTR_ERR(workspace);

ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
start, len, pages,
Expand Down Expand Up @@ -930,8 +971,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
int ret;

workspace = find_workspace(type);
if (IS_ERR(workspace))
return PTR_ERR(workspace);

ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
disk_start,
Expand All @@ -952,8 +991,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
int ret;

workspace = find_workspace(type);
if (IS_ERR(workspace))
return PTR_ERR(workspace);

ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
dest_page, start_byte,
Expand Down
6 changes: 3 additions & 3 deletions fs/btrfs/ctree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1011,7 +1011,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
return ret;
if (refs == 0) {
ret = -EROFS;
btrfs_std_error(root->fs_info, ret, NULL);
btrfs_handle_fs_error(root->fs_info, ret, NULL);
return ret;
}
} else {
Expand Down Expand Up @@ -1928,7 +1928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
child = read_node_slot(root, mid, 0);
if (!child) {
ret = -EROFS;
btrfs_std_error(root->fs_info, ret, NULL);
btrfs_handle_fs_error(root->fs_info, ret, NULL);
goto enospc;
}

Expand Down Expand Up @@ -2031,7 +2031,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
*/
if (!left) {
ret = -EROFS;
btrfs_std_error(root->fs_info, ret, NULL);
btrfs_handle_fs_error(root->fs_info, ret, NULL);
goto enospc;
}
wret = balance_node_right(trans, root, mid, left);
Expand Down
Loading

0 comments on commit 07be133

Please sign in to comment.