Skip to content

Commit

Permalink
ocfs2: improve fsync efficiency and fix deadlock between aio_write an…
Browse files Browse the repository at this point in the history
…d sync_file

Currently, ocfs2_sync_file grabs i_mutex and forces the current journal
transaction to complete.  This isn't terribly efficient, since sync_file
really only needs to wait for the last transaction involving that inode
to complete, and this doesn't require i_mutex.

Therefore, implement the necessary bits to track the newest tid
associated with an inode, and teach sync_file to wait for that instead
of waiting for everything in the journal to commit.  Furthermore, only
issue the flush request to the drive if jbd2 hasn't already done so.

This also eliminates the deadlock between ocfs2_file_aio_write() and
ocfs2_sync_file().  aio_write takes i_mutex then calls
ocfs2_aiodio_wait() to wait for unaligned dio writes to finish.
However, if that dio completion involves calling fsync, then we can get
into trouble when some ocfs2_sync_file tries to take i_mutex.

Signed-off-by: Darrick J. Wong <[email protected]>
Reviewed-by: Mark Fasheh <[email protected]>
Cc: Joel Becker <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
djwong authored and torvalds committed Apr 3, 2014
1 parent a75fe48 commit 2931cdc
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 21 deletions.
1 change: 1 addition & 0 deletions fs/ocfs2/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -6932,6 +6932,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
spin_unlock(&oi->ip_lock);

ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_dinode_new_extent_list(inode, di);

ocfs2_journal_dirty(handle, di_bh);
Expand Down
1 change: 1 addition & 0 deletions fs/ocfs2/aops.c
Original file line number Diff line number Diff line change
Expand Up @@ -2039,6 +2039,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, wc->w_di_bh);

ocfs2_commit_trans(osb, handle);
Expand Down
4 changes: 4 additions & 0 deletions fs/ocfs2/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -2957,6 +2957,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
ocfs2_init_dir_trailer(dir, dirdata_bh, i);
}

ocfs2_update_inode_fsync_trans(handle, dir, 1);
ocfs2_journal_dirty(handle, dirdata_bh);

if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
Expand Down Expand Up @@ -3338,6 +3339,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
} else {
de->rec_len = cpu_to_le16(sb->s_blocksize);
}
ocfs2_update_inode_fsync_trans(handle, dir, 1);
ocfs2_journal_dirty(handle, new_bh);

dir_i_size += dir->i_sb->s_blocksize;
Expand Down Expand Up @@ -3896,6 +3898,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
dquot_free_space_nodirty(dir,
ocfs2_clusters_to_bytes(dir->i_sb, 1));

ocfs2_update_inode_fsync_trans(handle, dir, 1);
ocfs2_commit_trans(osb, handle);

out:
Expand Down Expand Up @@ -4134,6 +4137,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
mlog_errno(ret);
did_quota = 0;

ocfs2_update_inode_fsync_trans(handle, dir, 1);
ocfs2_journal_dirty(handle, dx_root_bh);

out_commit:
Expand Down
36 changes: 15 additions & 21 deletions fs/ocfs2/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,13 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
int datasync)
{
int err = 0;
journal_t *journal;
struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_inode_info *oi = OCFS2_I(inode);
journal_t *journal = osb->journal->j_journal;
int ret;
tid_t commit_tid;
bool needs_barrier = false;

trace_ocfs2_sync_file(inode, file, file->f_path.dentry,
OCFS2_I(inode)->ip_blkno,
Expand All @@ -192,29 +196,19 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
if (err)
return err;

/*
* Probably don't need the i_mutex at all in here, just putting it here
* to be consistent with how fsync used to be called, someone more
* familiar with the fs could possibly remove it.
*/
mutex_lock(&inode->i_mutex);
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
/*
* We still have to flush drive's caches to get data to the
* platter
*/
if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
goto bail;
commit_tid = datasync ? oi->i_datasync_tid : oi->i_sync_tid;
if (journal->j_flags & JBD2_BARRIER &&
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
needs_barrier = true;
err = jbd2_complete_transaction(journal, commit_tid);
if (needs_barrier) {
ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
if (!err)
err = ret;
}

journal = osb->journal->j_journal;
err = jbd2_journal_force_commit(journal);

bail:
if (err)
mlog_errno(err);
mutex_unlock(&inode->i_mutex);

return (err < 0) ? -EIO : 0;
}
Expand Down Expand Up @@ -650,7 +644,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
mlog_errno(status);
goto leave;
}

ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, bh);

spin_lock(&OCFS2_I(inode)->ip_lock);
Expand Down
28 changes: 28 additions & 0 deletions fs/ocfs2/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
struct inode *inode = NULL;
struct super_block *sb = osb->sb;
struct ocfs2_find_inode_args args;
journal_t *journal = OCFS2_SB(sb)->journal->j_journal;

trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
sysfile_type);
Expand Down Expand Up @@ -169,6 +170,32 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
goto bail;
}

/*
* Set transaction id's of transactions that have to be committed
* to finish f[data]sync. We set them to currently running transaction
* as we cannot be sure that the inode or some of its metadata isn't
* part of the transaction - the inode could have been reclaimed and
* now it is reread from disk.
*/
if (journal) {
transaction_t *transaction;
tid_t tid;
struct ocfs2_inode_info *oi = OCFS2_I(inode);

read_lock(&journal->j_state_lock);
if (journal->j_running_transaction)
transaction = journal->j_running_transaction;
else
transaction = journal->j_committing_transaction;
if (transaction)
tid = transaction->t_tid;
else
tid = journal->j_commit_sequence;
read_unlock(&journal->j_state_lock);
oi->i_sync_tid = tid;
oi->i_datasync_tid = tid;
}

bail:
if (!IS_ERR(inode)) {
trace_ocfs2_iget_end(inode,
Expand Down Expand Up @@ -1260,6 +1287,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);

ocfs2_journal_dirty(handle, bh);
ocfs2_update_inode_fsync_trans(handle, inode, 1);
leave:
return status;
}
Expand Down
7 changes: 7 additions & 0 deletions fs/ocfs2/inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ struct ocfs2_inode_info
u32 ip_dir_lock_gen;

struct ocfs2_alloc_reservation ip_la_data_resv;

/*
* Transactions that contain inode's metadata needed to complete
* fsync and fdatasync, respectively.
*/
tid_t i_sync_tid;
tid_t i_datasync_tid;
};

/*
Expand Down
11 changes: 11 additions & 0 deletions fs/ocfs2/journal.h
Original file line number Diff line number Diff line change
Expand Up @@ -626,4 +626,15 @@ static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
new_size);
}

static inline void ocfs2_update_inode_fsync_trans(handle_t *handle,
struct inode *inode,
int datasync)
{
struct ocfs2_inode_info *oi = OCFS2_I(inode);

oi->i_sync_tid = handle->h_transaction->t_tid;
if (datasync)
oi->i_datasync_tid = handle->h_transaction->t_tid;
}

#endif /* OCFS2_JOURNAL_H */
4 changes: 4 additions & 0 deletions fs/ocfs2/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
struct ocfs2_dinode *fe = NULL;
struct ocfs2_extent_list *fel;
u16 feat;
struct ocfs2_inode_info *oi = OCFS2_I(inode);

*new_fe_bh = NULL;

Expand Down Expand Up @@ -576,6 +577,9 @@ static int __ocfs2_mknod_locked(struct inode *dir,
mlog_errno(status);
}

oi->i_sync_tid = handle->h_transaction->t_tid;
oi->i_datasync_tid = handle->h_transaction->t_tid;

status = 0; /* error in ocfs2_create_new_inode_locks is not
* critical */

Expand Down
3 changes: 3 additions & 0 deletions fs/ocfs2/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,9 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
if (!oi)
return NULL;

oi->i_sync_tid = 0;
oi->i_datasync_tid = 0;

jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode);
return &oi->vfs_inode;
}
Expand Down

0 comments on commit 2931cdc

Please sign in to comment.