Skip to content

Commit

Permalink
Merge tag 'xfs-4.20-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/x…
Browse files Browse the repository at this point in the history
…fs-linux

Pull vfs dedup fixes from Dave Chinner:
 "This reworks the vfs data cloning infrastructure.

  We discovered many issues with these interfaces late in the 4.19 cycle
  - the worst of them (data corruption, setuid stripping) were fixed for
  XFS in 4.19-rc8, but a larger rework of the infrastructure fixing all
  the problems was needed. That rework is the contents of this pull
  request.

  Rework the vfs_clone_file_range and vfs_dedupe_file_range
  infrastructure to use a common .remap_file_range method and supply
  generic bounds and sanity checking functions that are shared with the
  data write path. The current VFS infrastructure has problems with
  rlimit, LFS file sizes, file time stamps, maximum filesystem file
  sizes, stripping setuid bits, etc and so they are addressed in these
  commits.

  We also introduce the ability for the ->remap_file_range methods to
  return short clones so that clones for vfs_copy_file_range() don't get
  rejected if the entire range can't be cloned. It also allows
  filesystems to sliently skip deduplication of partial EOF blocks if
  they are not capable of doing so without requiring errors to be thrown
  to userspace.

  Existing filesystems are converted to user the new remap_file_range
  method, and both XFS and ocfs2 are modified to make use of the new
  generic checking infrastructure"

* tag 'xfs-4.20-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (28 commits)
  xfs: remove [cm]time update from reflink calls
  xfs: remove xfs_reflink_remap_range
  xfs: remove redundant remap partial EOF block checks
  xfs: support returning partial reflink results
  xfs: clean up xfs_reflink_remap_blocks call site
  xfs: fix pagecache truncation prior to reflink
  ocfs2: remove ocfs2_reflink_remap_range
  ocfs2: support partial clone range and dedupe range
  ocfs2: fix pagecache truncation prior to reflink
  ocfs2: truncate page cache for clone destination file before remapping
  vfs: clean up generic_remap_file_range_prep return value
  vfs: hide file range comparison function
  vfs: enable remap callers that can handle short operations
  vfs: plumb remap flags through the vfs dedupe functions
  vfs: plumb remap flags through the vfs clone functions
  vfs: make remap_file_range functions take and return bytes completed
  vfs: remap helper should update destination inode metadata
  vfs: pass remap flags to generic_remap_checks
  vfs: pass remap flags to generic_remap_file_range_prep
  vfs: combine the clone and dedupe into a single remap_file_range
  ...
  • Loading branch information
torvalds committed Nov 2, 2018
2 parents b69f9e1 + bf4a1fc commit c2aa1a4
Show file tree
Hide file tree
Showing 20 changed files with 735 additions and 597 deletions.
5 changes: 5 additions & 0 deletions Documentation/filesystems/porting
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,11 @@ in your dentry operations instead.
On success you get a new struct file sharing the mount/dentry with the
original, on failure - ERR_PTR().
--
[mandatory]
->clone_file_range() and ->dedupe_file_range have been replaced with
->remap_file_range(). See Documentation/filesystems/vfs.txt for more
information.
--
[recommended]
->lookup() instances doing an equivalent of
if (IS_ERR(inode))
Expand Down
22 changes: 15 additions & 7 deletions Documentation/filesystems/vfs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -883,8 +883,9 @@ struct file_operations {
unsigned (*mmap_capabilities)(struct file *);
#endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
};

Expand Down Expand Up @@ -960,11 +961,18 @@ otherwise noted.

copy_file_range: called by the copy_file_range(2) system call.

clone_file_range: called by the ioctl(2) system call for FICLONERANGE and
FICLONE commands.

dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE
command.
remap_file_range: called by the ioctl(2) system call for FICLONERANGE and
FICLONE and FIDEDUPERANGE commands to remap file ranges. An
implementation should remap len bytes at pos_in of the source file into
the dest file at pos_out. Implementations must handle callers passing
in len == 0; this means "remap to the end of the source file". The
return value should the number of bytes remapped, or the usual
negative error code if errors occurred before any bytes were remapped.
The remap_flags parameter accepts REMAP_FILE_* flags. If
REMAP_FILE_DEDUP is set then the implementation must only remap if the
requested file ranges have identical contents. If REMAP_CAN_SHORTEN is
set, the caller is ok with the implementation shortening the request
length to satisfy alignment or EOF requirements (or any other reason).

fadvise: possibly called by the fadvise64() system call.

Expand Down
8 changes: 3 additions & 5 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -3201,9 +3201,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs);
int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
struct file *dst_file, loff_t dst_loff,
u64 olen);

/* file.c */
int __init btrfs_auto_defrag_init(void);
Expand Down Expand Up @@ -3233,8 +3230,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes,
struct extent_state **cached);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len);
loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);

/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
Expand Down
3 changes: 1 addition & 2 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -3298,8 +3298,7 @@ const struct file_operations btrfs_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_compat_ioctl,
#endif
.clone_file_range = btrfs_clone_file_range,
.dedupe_file_range = btrfs_dedupe_file_range,
.remap_file_range = btrfs_remap_file_range,
};

void __cold btrfs_auto_defrag_exit(void)
Expand Down
50 changes: 27 additions & 23 deletions fs/btrfs/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -3629,26 +3629,6 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
return ret;
}

int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
struct file *dst_file, loff_t dst_loff,
u64 olen)
{
struct inode *src = file_inode(src_file);
struct inode *dst = file_inode(dst_file);
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;

if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
/*
* Btrfs does not support blocksize < page_size. As a
* result, btrfs_cmp_data() won't correctly handle
* this situation without an update.
*/
return -EINVAL;
}

return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
}

static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
struct inode *inode,
u64 endoff,
Expand Down Expand Up @@ -4350,10 +4330,34 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
return ret;
}

int btrfs_clone_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff, u64 len)
loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff, loff_t len,
unsigned int remap_flags)
{
return btrfs_clone_files(dst_file, src_file, off, len, destoff);
int ret;

if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
return -EINVAL;

if (remap_flags & REMAP_FILE_DEDUP) {
struct inode *src = file_inode(src_file);
struct inode *dst = file_inode(dst_file);
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;

if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
/*
* Btrfs does not support blocksize < page_size. As a
* result, btrfs_cmp_data() won't correctly handle
* this situation without an update.
*/
return -EINVAL;
}

ret = btrfs_extent_same(src, off, len, dst, destoff);
} else {
ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
}
return ret < 0 ? ret : len;
}

static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
Expand Down
24 changes: 14 additions & 10 deletions fs/cifs/cifsfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -992,8 +992,9 @@ const struct inode_operations cifs_symlink_inode_ops = {
.listxattr = cifs_listxattr,
};

static int cifs_clone_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff, u64 len)
static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff, loff_t len,
unsigned int remap_flags)
{
struct inode *src_inode = file_inode(src_file);
struct inode *target_inode = file_inode(dst_file);
Expand All @@ -1003,6 +1004,9 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
unsigned int xid;
int rc;

if (remap_flags & ~REMAP_FILE_ADVISORY)
return -EINVAL;

cifs_dbg(FYI, "clone range\n");

xid = get_xid();
Expand Down Expand Up @@ -1042,7 +1046,7 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
unlock_two_nondirectories(src_inode, target_inode);
out:
free_xid(xid);
return rc;
return rc < 0 ? rc : len;
}

ssize_t cifs_file_copychunk_range(unsigned int xid,
Expand Down Expand Up @@ -1151,7 +1155,7 @@ const struct file_operations cifs_file_ops = {
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
.remap_file_range = cifs_remap_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
Expand All @@ -1170,7 +1174,7 @@ const struct file_operations cifs_file_strict_ops = {
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
.remap_file_range = cifs_remap_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
Expand All @@ -1189,7 +1193,7 @@ const struct file_operations cifs_file_direct_ops = {
.splice_write = iter_file_splice_write,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
.remap_file_range = cifs_remap_file_range,
.llseek = cifs_llseek,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
Expand All @@ -1208,7 +1212,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
.remap_file_range = cifs_remap_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
Expand All @@ -1226,7 +1230,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
.remap_file_range = cifs_remap_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
Expand All @@ -1244,7 +1248,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
.splice_write = iter_file_splice_write,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
.remap_file_range = cifs_remap_file_range,
.llseek = cifs_llseek,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
Expand All @@ -1256,7 +1260,7 @@ const struct file_operations cifs_dir_ops = {
.read = generic_read_dir,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
.remap_file_range = cifs_remap_file_range,
.llseek = generic_file_llseek,
.fsync = cifs_dir_fsync,
};
Expand Down
10 changes: 9 additions & 1 deletion fs/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,14 +223,22 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
u64 off, u64 olen, u64 destoff)
{
struct fd src_file = fdget(srcfd);
loff_t cloned;
int ret;

if (!src_file.file)
return -EBADF;
ret = -EXDEV;
if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
goto fdput;
ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
olen, 0);
if (cloned < 0)
ret = cloned;
else if (olen && cloned != olen)
ret = -EINVAL;
else
ret = 0;
fdput:
fdput(src_file);
return ret;
Expand Down
12 changes: 8 additions & 4 deletions fs/nfs/nfs4file.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
return nfs42_proc_allocate(filep, offset, len);
}

static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, u64 count)
static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, loff_t count,
unsigned int remap_flags)
{
struct inode *dst_inode = file_inode(dst_file);
struct nfs_server *server = NFS_SERVER(dst_inode);
Expand All @@ -190,6 +191,9 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
bool same_inode = false;
int ret;

if (remap_flags & ~REMAP_FILE_ADVISORY)
return -EINVAL;

/* check alignment w.r.t. clone_blksize */
ret = -EINVAL;
if (bs) {
Expand Down Expand Up @@ -240,7 +244,7 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
inode_unlock(src_inode);
}
out:
return ret;
return ret < 0 ? ret : count;
}
#endif /* CONFIG_NFS_V4_2 */

Expand All @@ -262,7 +266,7 @@ const struct file_operations nfs4_file_operations = {
.copy_file_range = nfs4_copy_file_range,
.llseek = nfs4_file_llseek,
.fallocate = nfs42_fallocate,
.clone_file_range = nfs42_clone_file_range,
.remap_file_range = nfs42_remap_file_range,
#else
.llseek = nfs_file_llseek,
#endif
Expand Down
8 changes: 6 additions & 2 deletions fs/nfsd/vfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,8 +541,12 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
u64 dst_pos, u64 count)
{
return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
count));
loff_t cloned;

cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
if (count && cloned != count)
cloned = -EINVAL;
return nfserrno(cloned < 0 ? cloned : 0);
}

ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
Expand Down
Loading

0 comments on commit c2aa1a4

Please sign in to comment.