Skip to content

Commit

Permalink
fs: push i_mutex and filemap_write_and_wait down into ->fsync() handlers
Browse files Browse the repository at this point in the history
Btrfs needs to be able to control how filemap_write_and_wait_range() is called
in fsync to make it less of a painful operation, so push down taking i_mutex and
the calling of filemap_write_and_wait() down into the ->fsync() handlers.  Some
file systems can drop taking the i_mutex altogether it seems, like ext3 and
ocfs2.  For correctness sake I just pushed everything down in all cases to make
sure that we keep the current behavior the same for everybody, and then each
individual fs maintainer can make up their mind about what to do from there.
Thanks,

Acked-by: Jan Kara <[email protected]>
Signed-off-by: Josef Bacik <[email protected]>
Signed-off-by: Al Viro <[email protected]>
  • Loading branch information
Josef Bacik authored and Al Viro committed Jul 21, 2011
1 parent 2273506 commit 02c24a8
Show file tree
Hide file tree
Showing 71 changed files with 462 additions and 164 deletions.
6 changes: 2 additions & 4 deletions Documentation/filesystems/Locking
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ prototypes:
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, int datasync);
int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
Expand All @@ -438,9 +438,7 @@ prototypes:

locking rules:
All may block except for ->setlease.
No VFS locks held on entry except for ->fsync and ->setlease.

->fsync() has i_mutex on inode.
No VFS locks held on entry except for ->setlease.

->setlease has the file_list_lock held and must not sleep.

Expand Down
7 changes: 7 additions & 0 deletions Documentation/filesystems/porting
Original file line number Diff line number Diff line change
Expand Up @@ -421,3 +421,10 @@ data and there is a virtual hole at the end of the file. So if the provided
offset is less than i_size and SEEK_DATA is specified, return the same offset.
If the above is true for the offset and you are given SEEK_HOLE, return the end
of the file. If the offset is i_size or greater return -ENXIO in either case.

[mandatory]
If you have your own ->fsync() you must make sure to call
filemap_write_and_wait_range() so that all dirty pages are synced out properly.
You must also keep in mind that ->fsync() is not called with i_mutex held
anymore, so if you require i_mutex locking you must make sure to take it and
release it yourself.
2 changes: 1 addition & 1 deletion Documentation/filesystems/vfs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ struct file_operations {
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, int datasync);
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
Expand Down
11 changes: 9 additions & 2 deletions arch/powerpc/platforms/cell/spufs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1850,9 +1850,16 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id)
return ret;
}

static int spufs_mfc_fsync(struct file *file, int datasync)
static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
return spufs_mfc_flush(file, NULL);
struct inode *inode = file->f_path.dentry->d_inode;
int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (!err) {
mutex_lock(&inode->i_mutex);
err = spufs_mfc_flush(file, NULL);
mutex_unlock(&inode->i_mutex);
}
return err;
}

static int spufs_mfc_fasync(int fd, struct file *file, int on)
Expand Down
9 changes: 7 additions & 2 deletions drivers/char/ps3flash.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,14 @@ static int ps3flash_flush(struct file *file, fl_owner_t id)
return ps3flash_writeback(ps3flash_dev);
}

static int ps3flash_fsync(struct file *file, int datasync)
static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
return ps3flash_writeback(ps3flash_dev);
struct inode *inode = file->f_path.dentry->d_inode;
int err;
mutex_lock(&inode->i_mutex);
err = ps3flash_writeback(ps3flash_dev);
mutex_unlock(&inode->i_mutex);
return err;
}

static irqreturn_t ps3flash_interrupt(int irq, void *data)
Expand Down
10 changes: 7 additions & 3 deletions drivers/mtd/ubi/cdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,16 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin)
return new_offset;
}

static int vol_cdev_fsync(struct file *file, int datasync)
static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct ubi_volume_desc *desc = file->private_data;
struct ubi_device *ubi = desc->vol->ubi;

return ubi_sync(ubi->ubi_num);
struct inode *inode = file->f_path.dentry->d_inode;
int err;
mutex_lock(&inode->i_mutex);
err = ubi_sync(ubi->ubi_num);
mutex_unlock(&inode->i_mutex);
return err;
}


Expand Down
11 changes: 8 additions & 3 deletions drivers/staging/pohmelfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -887,11 +887,16 @@ static struct inode *pohmelfs_alloc_inode(struct super_block *sb)
/*
* We want fsync() to work on POHMELFS.
*/
static int pohmelfs_fsync(struct file *file, int datasync)
static int pohmelfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;

return sync_inode_metadata(inode, 1);
int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (!err) {
mutex_lock(&inode->i_mutex);
err = sync_inode_metadata(inode, 1);
mutex_unlock(&inode->i_mutex);
}
return err;
}

ssize_t pohmelfs_write(struct file *file, const char __user *buf,
Expand Down
5 changes: 4 additions & 1 deletion drivers/usb/gadget/printer.c
Original file line number Diff line number Diff line change
Expand Up @@ -795,12 +795,14 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
}

static int
printer_fsync(struct file *fd, int datasync)
printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
{
struct printer_dev *dev = fd->private_data;
struct inode *inode = fd->f_path.dentry->d_inode;
unsigned long flags;
int tx_list_empty;

mutex_lock(&inode->i_mutex);
spin_lock_irqsave(&dev->lock, flags);
tx_list_empty = (likely(list_empty(&dev->tx_reqs)));
spin_unlock_irqrestore(&dev->lock, flags);
Expand All @@ -810,6 +812,7 @@ printer_fsync(struct file *fd, int datasync)
wait_event_interruptible(dev->tx_flush_wait,
(likely(list_empty(&dev->tx_reqs_active))));
}
mutex_unlock(&inode->i_mutex);

return 0;
}
Expand Down
11 changes: 9 additions & 2 deletions drivers/video/fb_defio.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,26 @@ static int fb_deferred_io_fault(struct vm_area_struct *vma,
return 0;
}

int fb_deferred_io_fsync(struct file *file, int datasync)
int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct fb_info *info = file->private_data;
struct inode *inode = file->f_path.dentry->d_inode;
int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (err)
return err;

/* Skip if deferred io is compiled-in but disabled on this fbdev */
if (!info->fbdefio)
return 0;

mutex_lock(&inode->i_mutex);
/* Kill off the delayed work */
cancel_delayed_work_sync(&info->deferred_work);

/* Run it immediately */
return schedule_delayed_work(&info->deferred_work, 0);
err = schedule_delayed_work(&info->deferred_work, 0);
mutex_unlock(&inode->i_mutex);
return err;
}
EXPORT_SYMBOL_GPL(fb_deferred_io_fsync);

Expand Down
3 changes: 2 additions & 1 deletion fs/9p/v9fs_vfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
void v9fs_blank_wstat(struct p9_wstat *wstat);
int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
int v9fs_file_fsync_dotl(struct file *filp, int datasync);
int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
int datasync);
ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
const char __user *, size_t, loff_t *, int);
int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
Expand Down
22 changes: 20 additions & 2 deletions fs/9p/vfs_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,32 +519,50 @@ v9fs_file_write(struct file *filp, const char __user * data,
}


static int v9fs_file_fsync(struct file *filp, int datasync)
static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
int datasync)
{
struct p9_fid *fid;
struct inode *inode = filp->f_mapping->host;
struct p9_wstat wstat;
int retval;

retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (retval)
return retval;

mutex_lock(&inode->i_mutex);
P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);

fid = filp->private_data;
v9fs_blank_wstat(&wstat);

retval = p9_client_wstat(fid, &wstat);
mutex_unlock(&inode->i_mutex);

return retval;
}

int v9fs_file_fsync_dotl(struct file *filp, int datasync)
int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
int datasync)
{
struct p9_fid *fid;
struct inode *inode = filp->f_mapping->host;
int retval;

retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (retval)
return retval;

mutex_lock(&inode->i_mutex);
P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
filp, datasync);

fid = filp->private_data;

retval = p9_client_fsync(fid, datasync);
mutex_unlock(&inode->i_mutex);

return retval;
}

Expand Down
2 changes: 1 addition & 1 deletion fs/affs/affs.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent

void affs_free_prealloc(struct inode *inode);
extern void affs_truncate(struct inode *);
int affs_file_fsync(struct file *, int);
int affs_file_fsync(struct file *, loff_t, loff_t, int);

/* dir.c */

Expand Down
8 changes: 7 additions & 1 deletion fs/affs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -923,14 +923,20 @@ affs_truncate(struct inode *inode)
affs_free_prealloc(inode);
}

int affs_file_fsync(struct file *filp, int datasync)
int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *inode = filp->f_mapping->host;
int ret, err;

err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (err)
return err;

mutex_lock(&inode->i_mutex);
ret = write_inode_now(inode, 0);
err = sync_blockdev(inode->i_sb->s_bdev);
if (!ret)
ret = err;
mutex_unlock(&inode->i_mutex);
return ret;
}
2 changes: 1 addition & 1 deletion fs/afs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
extern int afs_writeback_all(struct afs_vnode *);
extern int afs_fsync(struct file *, int);
extern int afs_fsync(struct file *, loff_t, loff_t, int);


/*****************************************************************************/
Expand Down
18 changes: 14 additions & 4 deletions fs/afs/write.c
Original file line number Diff line number Diff line change
Expand Up @@ -681,9 +681,10 @@ int afs_writeback_all(struct afs_vnode *vnode)
* - the return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
*/
int afs_fsync(struct file *file, int datasync)
int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = file->f_mapping->host;
struct afs_writeback *wb, *xwb;
struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
int ret;
Expand All @@ -692,12 +693,19 @@ int afs_fsync(struct file *file, int datasync)
vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
datasync);

ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
return ret;
mutex_lock(&inode->i_mutex);

/* use a writeback record as a marker in the queue - when this reaches
* the front of the queue, all the outstanding writes are either
* completed or rejected */
wb = kzalloc(sizeof(*wb), GFP_KERNEL);
if (!wb)
return -ENOMEM;
if (!wb) {
ret = -ENOMEM;
goto out;
}
wb->vnode = vnode;
wb->first = 0;
wb->last = -1;
Expand All @@ -720,7 +728,7 @@ int afs_fsync(struct file *file, int datasync)
if (ret < 0) {
afs_put_writeback(wb);
_leave(" = %d [wb]", ret);
return ret;
goto out;
}

/* wait for the preceding writes to actually complete */
Expand All @@ -729,6 +737,8 @@ int afs_fsync(struct file *file, int datasync)
vnode->writebacks.next == &wb->link);
afs_put_writeback(wb);
_leave(" = %d", ret);
out:
mutex_unlock(&inode->i_mutex);
return ret;
}

Expand Down
3 changes: 2 additions & 1 deletion fs/bad_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ static int bad_file_release(struct inode *inode, struct file *filp)
return -EIO;
}

static int bad_file_fsync(struct file *file, int datasync)
static int bad_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync)
{
return -EIO;
}
Expand Down
6 changes: 1 addition & 5 deletions fs/block_dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
return retval;
}

int blkdev_fsync(struct file *filp, int datasync)
int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *bd_inode = filp->f_mapping->host;
struct block_device *bdev = I_BDEV(bd_inode);
Expand All @@ -389,14 +389,10 @@ int blkdev_fsync(struct file *filp, int datasync)
* i_mutex and doing so causes performance issues with concurrent
* O_SYNC writers to a block device.
*/
mutex_unlock(&bd_inode->i_mutex);

error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
if (error == -EOPNOTSUPP)
error = 0;

mutex_lock(&bd_inode->i_mutex);

return error;
}
EXPORT_SYMBOL(blkdev_fsync);
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2605,7 +2605,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct inode *inode);
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
int btrfs_sync_file(struct file *file, int datasync);
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int skip_pinned);
extern const struct file_operations btrfs_file_operations;
Expand Down
Loading

0 comments on commit 02c24a8

Please sign in to comment.