Skip to content

Commit

Permalink
ocfs2: fix race between dio and recover orphan
Browse files Browse the repository at this point in the history
During direct io the inode will be added to orphan first and then
deleted from orphan.  There is a race window that the orphan entry will
be deleted twice and thus trigger the BUG when validating
OCFS2_DIO_ORPHANED_FL in ocfs2_del_inode_from_orphan.

ocfs2_direct_IO_write
    ...
    ocfs2_add_inode_to_orphan
    >>>>>>>> race window.
             1) another node may rm the file and then down, this node
             take care of orphan recovery and clear flag
             OCFS2_DIO_ORPHANED_FL.
             2) since rw lock is unlocked, it may race with another
             orphan recovery and append dio.
    ocfs2_del_inode_from_orphan

So take inode mutex lock when recovering orphans and make rw unlock at the
end of aio write in case of append dio.

Signed-off-by: Joseph Qi <[email protected]>
Reported-by: Yiwen Jiang <[email protected]>
Cc: Weiwei Wang <[email protected]>
Cc: Mark Fasheh <[email protected]>
Cc: Joel Becker <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
josephhz authored and torvalds committed Sep 4, 2015
1 parent 81cf09e commit 512f62a
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 41 deletions.
9 changes: 6 additions & 3 deletions fs/ocfs2/aops.c
Original file line number Diff line number Diff line change
Expand Up @@ -627,10 +627,13 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
}

ocfs2_iocb_clear_rw_locked(iocb);
/* Let rw unlock to be done later to protect append direct io write */
if (offset + bytes <= i_size_read(inode)) {
ocfs2_iocb_clear_rw_locked(iocb);

level = ocfs2_iocb_rw_locked_level(iocb);
ocfs2_rw_unlock(inode, level);
level = ocfs2_iocb_rw_locked_level(iocb);
ocfs2_rw_unlock(inode, level);
}
}

static int ocfs2_releasepage(struct page *page, gfp_t wait)
Expand Down
2 changes: 1 addition & 1 deletion fs/ocfs2/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -2416,7 +2416,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
}

no_sync:
if (unaligned_dio) {
if (unaligned_dio && ocfs2_iocb_is_unaligned_aio(iocb)) {
ocfs2_iocb_clear_unaligned_aio(iocb);
mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
}
Expand Down
2 changes: 0 additions & 2 deletions fs/ocfs2/inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,6 @@ struct ocfs2_inode_info
tid_t i_sync_tid;
tid_t i_datasync_tid;

wait_queue_head_t append_dio_wq;

struct dquot *i_dquot[MAXQUOTAS];
};

Expand Down
8 changes: 4 additions & 4 deletions fs/ocfs2/journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -2170,6 +2170,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
iter = oi->ip_next_orphan;
oi->ip_next_orphan = NULL;

mutex_lock(&inode->i_mutex);
ret = ocfs2_rw_lock(inode, 1);
if (ret < 0) {
mlog_errno(ret);
Expand Down Expand Up @@ -2206,17 +2207,16 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 0, 0);
if (ret)
mlog_errno(ret);

wake_up(&OCFS2_I(inode)->append_dio_wq);
} /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */
unlock_inode:
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
di_bh = NULL;
unlock_rw:
ocfs2_rw_unlock(inode, 1);
next:
mutex_unlock(&inode->i_mutex);
iput(inode);
brelse(di_bh);
di_bh = NULL;
inode = iter;
}

Expand Down
42 changes: 13 additions & 29 deletions fs/ocfs2/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -2601,27 +2601,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
return status;
}

static int ocfs2_dio_orphan_recovered(struct inode *inode)
{
int ret;
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di = NULL;

ret = ocfs2_inode_lock(inode, &di_bh, 1);
if (ret < 0) {
mlog_errno(ret);
return 0;
}

di = (struct ocfs2_dinode *) di_bh->b_data;
ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL));
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);

return ret;
}

#define OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL 10000
int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
struct inode *inode)
{
Expand All @@ -2633,7 +2612,6 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
handle_t *handle = NULL;
struct ocfs2_dinode *di = NULL;

restart:
status = ocfs2_inode_lock(inode, &di_bh, 1);
if (status < 0) {
mlog_errno(status);
Expand All @@ -2643,15 +2621,21 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
di = (struct ocfs2_dinode *) di_bh->b_data;
/*
* Another append dio crashed?
* If so, wait for recovery first.
* If so, manually recover it first.
*/
if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
wait_event_interruptible_timeout(OCFS2_I(inode)->append_dio_wq,
ocfs2_dio_orphan_recovered(inode),
msecs_to_jiffies(OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL));
goto restart;
status = ocfs2_truncate_file(inode, di_bh, i_size_read(inode));
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
goto bail_unlock_inode;
}

status = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 0, 0);
if (status < 0) {
mlog_errno(status);
goto bail_unlock_inode;
}
}

status = ocfs2_prepare_orphan_dir(osb, &orphan_dir_inode,
Expand Down
2 changes: 0 additions & 2 deletions fs/ocfs2/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -1746,8 +1746,6 @@ static void ocfs2_inode_init_once(void *data)
ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
ocfs2_lock_res_init_once(&oi->ip_open_lockres);

init_waitqueue_head(&oi->append_dio_wq);

ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
&ocfs2_inode_caching_ops);

Expand Down

0 comments on commit 512f62a

Please sign in to comment.