Skip to content

Commit

Permalink
vfs: refactor clone/dedupe_file_range common functions
Browse files Browse the repository at this point in the history
Hoist both the XFS reflink inode state and preparation code and the XFS
file blocks compare functions into the VFS so that ocfs2 can take
advantage of it for reflink and dedupe.

Signed-off-by: Darrick J. Wong <[email protected]>
  • Loading branch information
djwong committed Dec 10, 2016
1 parent a76b5b0 commit 876bec6
Show file tree
Hide file tree
Showing 3 changed files with 219 additions and 204 deletions.
204 changes: 204 additions & 0 deletions fs/read_write.c
Original file line number Diff line number Diff line change
Expand Up @@ -1667,6 +1667,114 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
}

/*
* Check that the two inodes are eligible for cloning, the ranges make
* sense, and then flush all dirty data. Caller must ensure that the
* inodes have been locked against any other modifications.
*/
int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
struct inode *inode_out, loff_t pos_out,
u64 *len, bool is_dedupe)
{
loff_t bs = inode_out->i_sb->s_blocksize;
loff_t blen;
loff_t isize;
bool same_inode = (inode_in == inode_out);
int ret;

/* Don't touch certain kinds of inodes */
if (IS_IMMUTABLE(inode_out))
return -EPERM;

if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
return -ETXTBSY;

/* Don't reflink dirs, pipes, sockets... */
if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
return -EISDIR;
if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
return -EINVAL;

/* Are we going all the way to the end? */
isize = i_size_read(inode_in);
if (isize == 0) {
*len = 0;
return 0;
}

/* Zero length dedupe exits immediately; reflink goes to EOF. */
if (*len == 0) {
if (is_dedupe) {
*len = 0;
return 0;
}
*len = isize - pos_in;
}

/* Ensure offsets don't wrap and the input is inside i_size */
if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
pos_in + *len > isize)
return -EINVAL;

/* Don't allow dedupe past EOF in the dest file */
if (is_dedupe) {
loff_t disize;

disize = i_size_read(inode_out);
if (pos_out >= disize || pos_out + *len > disize)
return -EINVAL;
}

/* If we're linking to EOF, continue to the block boundary. */
if (pos_in + *len == isize)
blen = ALIGN(isize, bs) - pos_in;
else
blen = *len;

/* Only reflink if we're aligned to block boundaries */
if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
!IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
return -EINVAL;

/* Don't allow overlapped reflink within the same file */
if (same_inode) {
if (pos_out + blen > pos_in && pos_out < pos_in + blen)
return -EINVAL;
}

/* Wait for the completion of any pending IOs on both files */
inode_dio_wait(inode_in);
if (!same_inode)
inode_dio_wait(inode_out);

ret = filemap_write_and_wait_range(inode_in->i_mapping,
pos_in, pos_in + *len - 1);
if (ret)
return ret;

ret = filemap_write_and_wait_range(inode_out->i_mapping,
pos_out, pos_out + *len - 1);
if (ret)
return ret;

/*
* Check that the extents are the same.
*/
if (is_dedupe) {
bool is_same = false;

ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
inode_out, pos_out, *len, &is_same);
if (ret)
return ret;
if (!is_same)
return -EBADE;
}

return 0;
}
EXPORT_SYMBOL(vfs_clone_file_prep_inodes);

int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len)
{
Expand Down Expand Up @@ -1718,6 +1826,102 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
}
EXPORT_SYMBOL(vfs_clone_file_range);

/*
* Read a page's worth of file data into the page cache. Return the page
* locked.
*/
static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
{
struct address_space *mapping;
struct page *page;
pgoff_t n;

n = offset >> PAGE_SHIFT;
mapping = inode->i_mapping;
page = read_mapping_page(mapping, n, NULL);
if (IS_ERR(page))
return page;
if (!PageUptodate(page)) {
put_page(page);
return ERR_PTR(-EIO);
}
lock_page(page);
return page;
}

/*
* Compare extents of two files to see if they are the same.
* Caller must have locked both inodes to prevent write races.
*/
int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
struct inode *dest, loff_t destoff,
loff_t len, bool *is_same)
{
loff_t src_poff;
loff_t dest_poff;
void *src_addr;
void *dest_addr;
struct page *src_page;
struct page *dest_page;
loff_t cmp_len;
bool same;
int error;

error = -EINVAL;
same = true;
while (len) {
src_poff = srcoff & (PAGE_SIZE - 1);
dest_poff = destoff & (PAGE_SIZE - 1);
cmp_len = min(PAGE_SIZE - src_poff,
PAGE_SIZE - dest_poff);
cmp_len = min(cmp_len, len);
if (cmp_len <= 0)
goto out_error;

src_page = vfs_dedupe_get_page(src, srcoff);
if (IS_ERR(src_page)) {
error = PTR_ERR(src_page);
goto out_error;
}
dest_page = vfs_dedupe_get_page(dest, destoff);
if (IS_ERR(dest_page)) {
error = PTR_ERR(dest_page);
unlock_page(src_page);
put_page(src_page);
goto out_error;
}
src_addr = kmap_atomic(src_page);
dest_addr = kmap_atomic(dest_page);

flush_dcache_page(src_page);
flush_dcache_page(dest_page);

if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
same = false;

kunmap_atomic(dest_addr);
kunmap_atomic(src_addr);
unlock_page(dest_page);
unlock_page(src_page);
put_page(dest_page);
put_page(src_page);

if (!same)
break;

srcoff += cmp_len;
destoff += cmp_len;
len -= cmp_len;
}

*is_same = same;
return 0;

out_error:
return error;
}
EXPORT_SYMBOL(vfs_dedupe_file_range_compare);

int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
{
struct file_dedupe_range_info *info;
Expand Down
Loading

0 comments on commit 876bec6

Please sign in to comment.