Skip to content

Commit

Permalink
Merge branch 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/jack/linux-fs-2.6

* 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
  fsync: wait for data writeout completion before calling ->fsync
  vfs: Remove generic_osync_inode() and sync_page_range{_nolock}()
  fat: Opencode sync_page_range_nolock()
  pohmelfs: Use new syncing helper
  xfs: Convert sync_page_range() to simple filemap_write_and_wait_range()
  ocfs2: Update syncing after splicing to match generic version
  ntfs: Use new syncing helpers and update comments
  ext4: Remove syncing logic from ext4_file_write
  ext3: Remove syncing logic from ext3_file_write
  ext2: Update comment about generic_osync_inode
  vfs: Introduce new helpers for syncing after writing to O_SYNC file or IS_SYNC inode
  vfs: Rename generic_file_aio_write_nolock
  ocfs2: Use __generic_file_aio_write instead of generic_file_aio_write_nolock
  pohmelfs: Use __generic_file_aio_write instead of generic_file_aio_write_nolock
  vfs: Remove syncing from generic_file_direct_write() and generic_file_buffered_write()
  vfs: Export __generic_file_aio_write() and add some comments
  vfs: Introduce filemap_fdatawait_range
  • Loading branch information
torvalds committed Sep 14, 2009
2 parents 33f1de6 + 2daea67 commit 4142e0d
Show file tree
Hide file tree
Showing 18 changed files with 204 additions and 380 deletions.
2 changes: 1 addition & 1 deletion drivers/char/raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ static const struct file_operations raw_fops = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.write = do_sync_write,
.aio_write = generic_file_aio_write_nolock,
.aio_write = blkdev_aio_write,
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
Expand Down
6 changes: 3 additions & 3 deletions drivers/staging/pohmelfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -921,16 +921,16 @@ ssize_t pohmelfs_write(struct file *file, const char __user *buf,
if (ret)
goto err_out_unlock;

ret = generic_file_aio_write_nolock(&kiocb, &iov, 1, pos);
ret = __generic_file_aio_write(&kiocb, &iov, 1, &kiocb.ki_pos);
*ppos = kiocb.ki_pos;

mutex_unlock(&inode->i_mutex);
WARN_ON(ret < 0);

if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
if (ret > 0) {
ssize_t err;

err = sync_page_range(inode, mapping, pos, ret);
err = generic_write_sync(file, pos, ret);
if (err < 0)
ret = err;
WARN_ON(ret < 0);
Expand Down
29 changes: 28 additions & 1 deletion fs/block_dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1404,6 +1404,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return blkdev_ioctl(bdev, mode, cmd, arg);
}

/*
* Write data to the block device. Only intended for the block device itself
* and the raw driver which basically is a fake block device.
*
* Does not take i_mutex for the write and thus is not for general purpose
* use.
*/
ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
ssize_t ret;

BUG_ON(iocb->ki_pos != pos);

ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;

err = generic_write_sync(file, pos, ret);
if (err < 0 && ret > 0)
ret = err;
}
return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_write);

/*
* Try to release a page associated with block device when the system
* is under memory pressure.
Expand Down Expand Up @@ -1436,7 +1463,7 @@ const struct file_operations def_blk_fops = {
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write_nolock,
.aio_write = blkdev_aio_write,
.mmap = generic_file_mmap,
.fsync = block_fsync,
.unlocked_ioctl = block_ioctl,
Expand Down
2 changes: 1 addition & 1 deletion fs/ext2/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ static int ext2_alloc_branch(struct inode *inode,
unlock_buffer(bh);
mark_buffer_dirty_inode(bh, inode);
/* We used to sync bh here if IS_SYNC(inode).
* But we now rely upon generic_osync_inode()
* But we now rely upon generic_write_sync()
* and b_inode_buffers. But not for directories.
*/
if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
Expand Down
61 changes: 1 addition & 60 deletions fs/ext3/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
return 0;
}

static ssize_t
ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_path.dentry->d_inode;
ssize_t ret;
int err;

ret = generic_file_aio_write(iocb, iov, nr_segs, pos);

/*
* Skip flushing if there was an error, or if nothing was written.
*/
if (ret <= 0)
return ret;

/*
* If the inode is IS_SYNC, or is O_SYNC and we are doing data
* journalling then we need to make sure that we force the transaction
* to disk to keep all metadata uptodate synchronously.
*/
if (file->f_flags & O_SYNC) {
/*
* If we are non-data-journaled, then the dirty data has
* already been flushed to backing store by generic_osync_inode,
* and the inode has been flushed too if there have been any
* modifications other than mere timestamp updates.
*
* Open question --- do we care about flushing timestamps too
* if the inode is IS_SYNC?
*/
if (!ext3_should_journal_data(inode))
return ret;

goto force_commit;
}

/*
* So we know that there has been no forced data flush. If the inode
* is marked IS_SYNC, we need to force one ourselves.
*/
if (!IS_SYNC(inode))
return ret;

/*
* Open question #2 --- should we force data to disk here too? If we
* don't, the only impact is that data=writeback filesystems won't
* flush data to disk automatically on IS_SYNC, only metadata (but
* historically, that is what ext2 has done.)
*/

force_commit:
err = ext3_force_commit(inode->i_sb);
if (err)
return err;
return ret;
}

const struct file_operations ext3_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = ext3_file_write,
.aio_write = generic_file_aio_write,
.unlocked_ioctl = ext3_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext3_compat_ioctl,
Expand Down
53 changes: 2 additions & 51 deletions fs/ext4/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,7 @@ static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_path.dentry->d_inode;
ssize_t ret;
int err;
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;

/*
* If we have encountered a bitmap-format file, the size limit
Expand All @@ -81,53 +78,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
}
}

ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
/*
* Skip flushing if there was an error, or if nothing was written.
*/
if (ret <= 0)
return ret;

/*
* If the inode is IS_SYNC, or is O_SYNC and we are doing data
* journalling then we need to make sure that we force the transaction
* to disk to keep all metadata uptodate synchronously.
*/
if (file->f_flags & O_SYNC) {
/*
* If we are non-data-journaled, then the dirty data has
* already been flushed to backing store by generic_osync_inode,
* and the inode has been flushed too if there have been any
* modifications other than mere timestamp updates.
*
* Open question --- do we care about flushing timestamps too
* if the inode is IS_SYNC?
*/
if (!ext4_should_journal_data(inode))
return ret;

goto force_commit;
}

/*
* So we know that there has been no forced data flush. If the inode
* is marked IS_SYNC, we need to force one ourselves.
*/
if (!IS_SYNC(inode))
return ret;

/*
* Open question #2 --- should we force data to disk here too? If we
* don't, the only impact is that data=writeback filesystems won't
* flush data to disk automatically on IS_SYNC, only metadata (but
* historically, that is what ext2 has done.)
*/

force_commit:
err = ext4_force_commit(inode->i_sb);
if (err)
return err;
return ret;
return generic_file_aio_write(iocb, iov, nr_segs, pos);
}

static struct vm_operations_struct ext4_file_vm_ops = {
Expand Down
22 changes: 20 additions & 2 deletions fs/fat/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,26 @@ static int fat_cont_expand(struct inode *inode, loff_t size)

inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
if (IS_SYNC(inode))
err = sync_page_range_nolock(inode, mapping, start, count);
if (IS_SYNC(inode)) {
int err2;

/*
* Opencode syncing since we don't have a file open to use
* standard fsync path.
*/
err = filemap_fdatawrite_range(mapping, start,
start + count - 1);
err2 = sync_mapping_buffers(mapping);
if (!err)
err = err2;
err2 = write_inode_now(inode, 1);
if (!err)
err = err2;
if (!err) {
err = filemap_fdatawait_range(mapping, start,
start + count - 1);
}
}
out:
return err;
}
Expand Down
4 changes: 2 additions & 2 deletions fs/fat/misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
MSDOS_I(inode)->i_start = new_dclus;
MSDOS_I(inode)->i_logstart = new_dclus;
/*
* Since generic_osync_inode() synchronize later if
* this is not directory, we don't here.
* Since generic_write_sync() synchronizes regular files later,
* we sync here only directories.
*/
if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) {
ret = fat_sync_inode(inode);
Expand Down
54 changes: 0 additions & 54 deletions fs/fs-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -1242,57 +1242,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
return ret;
}
EXPORT_SYMBOL(sync_inode);

/**
* generic_osync_inode - flush all dirty data for a given inode to disk
* @inode: inode to write
* @mapping: the address_space that should be flushed
* @what: what to write and wait upon
*
* This can be called by file_write functions for files which have the
* O_SYNC flag set, to flush dirty writes to disk.
*
* @what is a bitmask, specifying which part of the inode's data should be
* written and waited upon.
*
* OSYNC_DATA: i_mapping's dirty data
* OSYNC_METADATA: the buffers at i_mapping->private_list
* OSYNC_INODE: the inode itself
*/

int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what)
{
int err = 0;
int need_write_inode_now = 0;
int err2;

if (what & OSYNC_DATA)
err = filemap_fdatawrite(mapping);
if (what & (OSYNC_METADATA|OSYNC_DATA)) {
err2 = sync_mapping_buffers(mapping);
if (!err)
err = err2;
}
if (what & OSYNC_DATA) {
err2 = filemap_fdatawait(mapping);
if (!err)
err = err2;
}

spin_lock(&inode_lock);
if ((inode->i_state & I_DIRTY) &&
((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
need_write_inode_now = 1;
spin_unlock(&inode_lock);

if (need_write_inode_now) {
err2 = write_inode_now(inode, 1);
if (!err)
err = err2;
}
else
inode_sync_wait(inode);

return err;
}
EXPORT_SYMBOL(generic_osync_inode);
16 changes: 4 additions & 12 deletions fs/ntfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -2076,14 +2076,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
*ppos = pos;
if (cached_page)
page_cache_release(cached_page);
/* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */
if (likely(!status)) {
if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) {
if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb))
status = generic_osync_inode(vi, mapping,
OSYNC_METADATA|OSYNC_DATA);
}
}
pagevec_lru_add_file(&lru_pvec);
ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
written ? "written" : "status", (unsigned long)written,
Expand Down Expand Up @@ -2145,8 +2137,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
mutex_lock(&inode->i_mutex);
ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err = sync_page_range(inode, mapping, pos, ret);
if (ret > 0) {
int err = generic_write_sync(file, pos, ret);
if (err < 0)
ret = err;
}
Expand All @@ -2173,8 +2165,8 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
if (ret == -EIOCBQUEUED)
ret = wait_on_sync_kiocb(&kiocb);
mutex_unlock(&inode->i_mutex);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err = sync_page_range(inode, mapping, *ppos - ret, ret);
if (ret > 0) {
int err = generic_write_sync(file, *ppos - ret, ret);
if (err < 0)
ret = err;
}
Expand Down
13 changes: 6 additions & 7 deletions fs/ntfs/mft.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,13 +384,12 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
* it is dirty in the inode meta data rather than the data page cache of the
* inode, and thus there are no data pages that need writing out. Therefore, a
* full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the
* other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to
* ensure ->write_inode is called from generic_osync_inode() and this needs to
* happen or the file data would not necessarily hit the device synchronously,
* even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC
* simply "feels" better than just I_DIRTY_SYNC, since the file data has not
* actually hit the block device yet, which is not what I_DIRTY_SYNC on its own
* would suggest.
* other hand, is not sufficient, because ->write_inode needs to be called even
* in case of fdatasync. This needs to happen or the file data would not
* necessarily hit the device synchronously, even though the vfs inode has the
* O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just
* I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
* which is not what I_DIRTY_SYNC on its own would suggest.
*/
void __mark_mft_record_dirty(ntfs_inode *ni)
{
Expand Down
Loading

0 comments on commit 4142e0d

Please sign in to comment.