Skip to content

Commit

Permalink
Merge tag 'ceph-for-5.2-rc4' of git://github.com/ceph/ceph-client
Browse files Browse the repository at this point in the history
Pull ceph fixes from Ilya Dryomov:
 "A change to call iput() asynchronously to avoid a possible deadlock
  when iput_final() needs to wait for in-flight I/O (e.g. readahead) and
  a fixup for a cleanup that went into -rc1"

* tag 'ceph-for-5.2-rc4' of git://github.com/ceph/ceph-client:
  ceph: fix error handling in ceph_get_caps()
  ceph: avoid iput_final() while holding mutex or in dispatch thread
  ceph: single workqueue for inode related works
  • Loading branch information
torvalds committed Jun 8, 2019
2 parents 8e61f6f + 7b2f936 commit 2759e05
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 135 deletions.
34 changes: 19 additions & 15 deletions fs/ceph/caps.c
Original file line number Diff line number Diff line change
Expand Up @@ -2738,15 +2738,13 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
_got = 0;
ret = try_get_cap_refs(ci, need, want, endoff,
false, &_got);
if (ret == -EAGAIN) {
if (ret == -EAGAIN)
continue;
} else if (!ret) {
int err;

if (!ret) {
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(&ci->i_cap_wq, &wait);

while (!(err = try_get_cap_refs(ci, need, want, endoff,
while (!(ret = try_get_cap_refs(ci, need, want, endoff,
true, &_got))) {
if (signal_pending(current)) {
ret = -ERESTARTSYS;
Expand All @@ -2756,14 +2754,16 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
}

remove_wait_queue(&ci->i_cap_wq, &wait);
if (err == -EAGAIN)
if (ret == -EAGAIN)
continue;
}
if (ret == -ESTALE) {
/* session was killed, try renew caps */
ret = ceph_renew_caps(&ci->vfs_inode);
if (ret == 0)
continue;
if (ret < 0) {
if (ret == -ESTALE) {
/* session was killed, try renew caps */
ret = ceph_renew_caps(&ci->vfs_inode);
if (ret == 0)
continue;
}
return ret;
}

Expand Down Expand Up @@ -2992,8 +2992,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
}
if (complete_capsnap)
wake_up_all(&ci->i_cap_wq);
while (put-- > 0)
iput(inode);
while (put-- > 0) {
/* avoid calling iput_final() in osd dispatch threads */
ceph_async_iput(inode);
}
}

/*
Expand Down Expand Up @@ -3964,8 +3966,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
done:
mutex_unlock(&session->s_mutex);
done_unlocked:
iput(inode);
ceph_put_string(extra_info.pool_ns);
/* avoid calling iput_final() in mds dispatch threads */
ceph_async_iput(inode);
return;

flush_cap_releases:
Expand Down Expand Up @@ -4011,7 +4014,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
if (inode) {
dout("check_delayed_caps on %p\n", inode);
ceph_check_caps(ci, flags, NULL);
iput(inode);
/* avoid calling iput_final() in tick thread */
ceph_async_iput(inode);
}
}
spin_unlock(&mdsc->cap_delay_lock);
Expand Down
2 changes: 1 addition & 1 deletion fs/ceph/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
if (aio_work) {
INIT_WORK(&aio_work->work, ceph_aio_retry_work);
aio_work->req = req;
queue_work(ceph_inode_to_client(inode)->wb_wq,
queue_work(ceph_inode_to_client(inode)->inode_wq,
&aio_work->work);
return;
}
Expand Down
155 changes: 83 additions & 72 deletions fs/ceph/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@

static const struct inode_operations ceph_symlink_iops;

static void ceph_invalidate_work(struct work_struct *work);
static void ceph_writeback_work(struct work_struct *work);
static void ceph_vmtruncate_work(struct work_struct *work);
static void ceph_inode_work(struct work_struct *work);

/*
* find or create an inode, given the ceph ino number
Expand Down Expand Up @@ -509,10 +507,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&ci->i_snap_realm_item);
INIT_LIST_HEAD(&ci->i_snap_flush_item);

INIT_WORK(&ci->i_wb_work, ceph_writeback_work);
INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work);

INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
INIT_WORK(&ci->i_work, ceph_inode_work);
ci->i_work_mask = 0;

ceph_fscache_inode_init(ci);

Expand Down Expand Up @@ -1480,7 +1476,8 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
pr_err("fill_inode badness on %p got %d\n", in, rc);
err = rc;
}
iput(in);
/* avoid calling iput_final() in mds dispatch threads */
ceph_async_iput(in);
}

return err;
Expand Down Expand Up @@ -1678,8 +1675,11 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
&req->r_caps_reservation);
if (ret < 0) {
pr_err("fill_inode badness on %p\n", in);
if (d_really_is_negative(dn))
iput(in);
if (d_really_is_negative(dn)) {
/* avoid calling iput_final() in mds
* dispatch threads */
ceph_async_iput(in);
}
d_drop(dn);
err = ret;
goto next_item;
Expand All @@ -1689,7 +1689,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
if (ceph_security_xattr_deadlock(in)) {
dout(" skip splicing dn %p to inode %p"
" (security xattr deadlock)\n", dn, in);
iput(in);
ceph_async_iput(in);
skipped++;
goto next_item;
}
Expand Down Expand Up @@ -1740,57 +1740,87 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size)
return ret;
}

/*
* Put reference to inode, but avoid calling iput_final() in current thread.
* iput_final() may wait for reahahead pages. The wait can cause deadlock in
* some contexts.
*/
void ceph_async_iput(struct inode *inode)
{
if (!inode)
return;
for (;;) {
if (atomic_add_unless(&inode->i_count, -1, 1))
break;
if (queue_work(ceph_inode_to_client(inode)->inode_wq,
&ceph_inode(inode)->i_work))
break;
/* queue work failed, i_count must be at least 2 */
}
}

/*
* Write back inode data in a worker thread. (This can't be done
* in the message handler context.)
*/
void ceph_queue_writeback(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
set_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask);

ihold(inode);
if (queue_work(ceph_inode_to_client(inode)->wb_wq,
&ceph_inode(inode)->i_wb_work)) {
if (queue_work(ceph_inode_to_client(inode)->inode_wq,
&ci->i_work)) {
dout("ceph_queue_writeback %p\n", inode);
} else {
dout("ceph_queue_writeback %p failed\n", inode);
dout("ceph_queue_writeback %p already queued, mask=%lx\n",
inode, ci->i_work_mask);
iput(inode);
}
}

static void ceph_writeback_work(struct work_struct *work)
{
struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
i_wb_work);
struct inode *inode = &ci->vfs_inode;

dout("writeback %p\n", inode);
filemap_fdatawrite(&inode->i_data);
iput(inode);
}

/*
* queue an async invalidation
*/
void ceph_queue_invalidate(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
set_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask);

ihold(inode);
if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
&ceph_inode(inode)->i_pg_inv_work)) {
if (queue_work(ceph_inode_to_client(inode)->inode_wq,
&ceph_inode(inode)->i_work)) {
dout("ceph_queue_invalidate %p\n", inode);
} else {
dout("ceph_queue_invalidate %p failed\n", inode);
dout("ceph_queue_invalidate %p already queued, mask=%lx\n",
inode, ci->i_work_mask);
iput(inode);
}
}

/*
* Invalidate inode pages in a worker thread. (This can't be done
* in the message handler context.)
* Queue an async vmtruncate. If we fail to queue work, we will handle
* the truncation the next time we call __ceph_do_pending_vmtruncate.
*/
static void ceph_invalidate_work(struct work_struct *work)
void ceph_queue_vmtruncate(struct inode *inode)
{
struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
i_pg_inv_work);
struct inode *inode = &ci->vfs_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
set_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask);

ihold(inode);
if (queue_work(ceph_inode_to_client(inode)->inode_wq,
&ci->i_work)) {
dout("ceph_queue_vmtruncate %p\n", inode);
} else {
dout("ceph_queue_vmtruncate %p already queued, mask=%lx\n",
inode, ci->i_work_mask);
iput(inode);
}
}

static void ceph_do_invalidate_pages(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
u32 orig_gen;
int check = 0;
Expand Down Expand Up @@ -1842,44 +1872,6 @@ static void ceph_invalidate_work(struct work_struct *work)
out:
if (check)
ceph_check_caps(ci, 0, NULL);
iput(inode);
}


/*
* called by trunc_wq;
*
* We also truncate in a separate thread as well.
*/
static void ceph_vmtruncate_work(struct work_struct *work)
{
struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
i_vmtruncate_work);
struct inode *inode = &ci->vfs_inode;

dout("vmtruncate_work %p\n", inode);
__ceph_do_pending_vmtruncate(inode);
iput(inode);
}

/*
* Queue an async vmtruncate. If we fail to queue work, we will handle
* the truncation the next time we call __ceph_do_pending_vmtruncate.
*/
void ceph_queue_vmtruncate(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);

ihold(inode);

if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
&ci->i_vmtruncate_work)) {
dout("ceph_queue_vmtruncate %p\n", inode);
} else {
dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
inode, ci->i_truncate_pending);
iput(inode);
}
}

/*
Expand Down Expand Up @@ -1943,6 +1935,25 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
wake_up_all(&ci->i_cap_wq);
}

static void ceph_inode_work(struct work_struct *work)
{
struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
i_work);
struct inode *inode = &ci->vfs_inode;

if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) {
dout("writeback %p\n", inode);
filemap_fdatawrite(&inode->i_data);
}
if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask))
ceph_do_invalidate_pages(inode);

if (test_and_clear_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask))
__ceph_do_pending_vmtruncate(inode);

iput(inode);
}

/*
* symlinks
*/
Expand Down
Loading

0 comments on commit 2759e05

Please sign in to comment.