Skip to content

Commit

Permalink
Merge tag 'ceph-for-4.21-rc1' of git://github.com/ceph/ceph-client
Browse files Browse the repository at this point in the history
Pull ceph updates from Ilya Dryomov:
 "A fairly quiet round: a couple of messenger performance improvements
  from myself and a few cap handling fixes from Zheng"

* tag 'ceph-for-4.21-rc1' of git://github.com/ceph/ceph-client:
  ceph: don't encode inode pathes into reconnect message
  ceph: update wanted caps after resuming stale session
  ceph: skip updating 'wanted' caps if caps are already issued
  ceph: don't request excl caps when mount is readonly
  ceph: don't update importing cap's mseq when handing cap export
  libceph: switch more to bool in ceph_tcp_sendmsg()
  libceph: use MSG_SENDPAGE_NOTLAST with ceph_tcp_sendpage()
  libceph: use sock_no_sendpage() as a fallback in ceph_tcp_sendpage()
  libceph: drop last_piece logic from write_partial_message_data()
  ceph: remove redundant assignment
  ceph: cleanup splice_dentry()
  • Loading branch information
torvalds committed Jan 5, 2019
2 parents 35004f2 + 5ccedf1 commit c7eaf34
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 162 deletions.
75 changes: 45 additions & 30 deletions fs/ceph/caps.c
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,9 @@ void ceph_add_cap(struct inode *inode,
session->s_nr_caps++;
spin_unlock(&session->s_cap_lock);
} else {
if (cap->cap_gen < session->s_cap_gen)
cap->issued = cap->implemented = CEPH_CAP_PIN;

/*
* auth mds of the inode changed. we received the cap export
* message, but still haven't received the cap import message.
Expand Down Expand Up @@ -1855,14 +1858,17 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
retain |= CEPH_CAP_ANY; /* be greedy */
} else if (S_ISDIR(inode->i_mode) &&
(issued & CEPH_CAP_FILE_SHARED) &&
__ceph_dir_is_complete(ci)) {
__ceph_dir_is_complete(ci)) {
/*
* If a directory is complete, we want to keep
* the exclusive cap. So that MDS does not end up
* revoking the shared cap on every create/unlink
* operation.
*/
want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
if (IS_RDONLY(inode))
want = CEPH_CAP_ANY_SHARED;
else
want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
retain |= want;
} else {

Expand Down Expand Up @@ -1970,8 +1976,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
goto ack;

/* things we might delay */
if ((cap->issued & ~retain) == 0 &&
cap->mds_wanted == want)
if ((cap->issued & ~retain) == 0)
continue; /* nope, all good */

if (no_delay)
Expand Down Expand Up @@ -3048,7 +3053,8 @@ static void handle_cap_grant(struct inode *inode,
int used, wanted, dirty;
u64 size = le64_to_cpu(grant->size);
u64 max_size = le64_to_cpu(grant->max_size);
int check_caps = 0;
unsigned char check_caps = 0;
bool was_stale = cap->cap_gen < session->s_cap_gen;
bool wake = false;
bool writeback = false;
bool queue_trunc = false;
Expand All @@ -3062,21 +3068,6 @@ static void handle_cap_grant(struct inode *inode,
inode->i_size);


/*
* auth mds of the inode changed. we received the cap export message,
* but still haven't received the cap import message. handle_cap_export
* updated the new auth MDS' cap.
*
* "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
* that was sent before the cap import message. So don't remove caps.
*/
if (ceph_seq_cmp(seq, cap->seq) <= 0) {
WARN_ON(cap != ci->i_auth_cap);
WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
seq = cap->seq;
newcaps |= cap->issued;
}

/*
* If CACHE is being revoked, and we have no dirty buffers,
* try to invalidate (once). (If there are dirty buffers, we
Expand All @@ -3096,6 +3087,24 @@ static void handle_cap_grant(struct inode *inode,
}
}

if (was_stale)
cap->issued = cap->implemented = CEPH_CAP_PIN;

/*
* auth mds of the inode changed. we received the cap export message,
* but still haven't received the cap import message. handle_cap_export
* updated the new auth MDS' cap.
*
* "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
* that was sent before the cap import message. So don't remove caps.
*/
if (ceph_seq_cmp(seq, cap->seq) <= 0) {
WARN_ON(cap != ci->i_auth_cap);
WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
seq = cap->seq;
newcaps |= cap->issued;
}

/* side effects now are allowed */
cap->cap_gen = session->s_cap_gen;
cap->seq = seq;
Expand Down Expand Up @@ -3200,13 +3209,20 @@ static void handle_cap_grant(struct inode *inode,
ceph_cap_string(wanted),
ceph_cap_string(used),
ceph_cap_string(dirty));
if (wanted != le32_to_cpu(grant->wanted)) {
dout("mds wanted %s -> %s\n",
ceph_cap_string(le32_to_cpu(grant->wanted)),
ceph_cap_string(wanted));
/* imported cap may not have correct mds_wanted */
if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
check_caps = 1;

if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) &&
(wanted & ~(cap->mds_wanted | newcaps))) {
/*
* If mds is importing cap, prior cap messages that update
* 'wanted' may get dropped by mds (migrate seq mismatch).
*
* We don't send cap message to update 'wanted' if what we
* want are already issued. If mds revokes caps, cap message
* that releases caps also tells mds what we want. But if
* caps got revoked by mds forcedly (session stale). We may
* haven't told mds what we want.
*/
check_caps = 1;
}

/* revocation, grant, or no-op? */
Expand Down Expand Up @@ -3539,9 +3555,9 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
goto out_unlock;

if (target < 0) {
__ceph_remove_cap(cap, false);
if (!ci->i_auth_cap)
if (cap->mds_wanted | cap->issued)
ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
__ceph_remove_cap(cap, false);
goto out_unlock;
}

Expand Down Expand Up @@ -3569,7 +3585,6 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
tcap->cap_id = t_cap_id;
tcap->seq = t_seq - 1;
tcap->issue_seq = t_seq - 1;
tcap->mseq = t_mseq;
tcap->issued |= issued;
tcap->implemented |= issued;
if (cap == ci->i_auth_cap)
Expand Down
60 changes: 24 additions & 36 deletions fs/ceph/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1098,8 +1098,9 @@ static void update_dentry_lease(struct dentry *dentry,
* splice a dentry to an inode.
* caller must hold directory i_mutex for this to be safe.
*/
static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
static int splice_dentry(struct dentry **pdn, struct inode *in)
{
struct dentry *dn = *pdn;
struct dentry *realdn;

BUG_ON(d_inode(dn));
Expand Down Expand Up @@ -1132,28 +1133,23 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
if (IS_ERR(realdn)) {
pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
PTR_ERR(realdn), dn, in, ceph_vinop(in));
dn = realdn;
/*
* Caller should release 'dn' in the case of error.
* If 'req->r_dentry' is passed to this function,
* caller should leave 'req->r_dentry' untouched.
*/
goto out;
} else if (realdn) {
return PTR_ERR(realdn);
}

if (realdn) {
dout("dn %p (%d) spliced with %p (%d) "
"inode %p ino %llx.%llx\n",
dn, d_count(dn),
realdn, d_count(realdn),
d_inode(realdn), ceph_vinop(d_inode(realdn)));
dput(dn);
dn = realdn;
*pdn = realdn;
} else {
BUG_ON(!ceph_dentry(dn));
dout("dn %p attached to %p ino %llx.%llx\n",
dn, d_inode(dn), ceph_vinop(d_inode(dn)));
}
out:
return dn;
return 0;
}

/*
Expand Down Expand Up @@ -1340,7 +1336,12 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
ceph_dentry(req->r_old_dentry)->offset);

dn = req->r_old_dentry; /* use old_dentry */
/* swap r_dentry and r_old_dentry in case that
* splice_dentry() gets called later. This is safe
* because no other place will use them */
req->r_dentry = req->r_old_dentry;
req->r_old_dentry = dn;
dn = req->r_dentry;
}

/* null dentry? */
Expand All @@ -1365,12 +1366,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
if (d_really_is_negative(dn)) {
ceph_dir_clear_ordered(dir);
ihold(in);
dn = splice_dentry(dn, in);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
err = splice_dentry(&req->r_dentry, in);
if (err < 0)
goto done;
}
req->r_dentry = dn; /* may have spliced */
dn = req->r_dentry; /* may have spliced */
} else if (d_really_is_positive(dn) && d_inode(dn) != in) {
dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
dn, d_inode(dn), ceph_vinop(d_inode(dn)),
Expand All @@ -1390,22 +1389,18 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
} else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
req->r_op == CEPH_MDS_OP_MKSNAP) &&
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
struct dentry *dn = req->r_dentry;
struct inode *dir = req->r_parent;

/* fill out a snapdir LOOKUPSNAP dentry */
BUG_ON(!dn);
BUG_ON(!dir);
BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
dout(" linking snapped dir %p to dn %p\n", in, dn);
BUG_ON(!req->r_dentry);
dout(" linking snapped dir %p to dn %p\n", in, req->r_dentry);
ceph_dir_clear_ordered(dir);
ihold(in);
dn = splice_dentry(dn, in);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
err = splice_dentry(&req->r_dentry, in);
if (err < 0)
goto done;
}
req->r_dentry = dn; /* may have spliced */
} else if (rinfo->head->is_dentry) {
struct ceph_vino *ptvino = NULL;

Expand Down Expand Up @@ -1669,8 +1664,6 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
}

if (d_really_is_negative(dn)) {
struct dentry *realdn;

if (ceph_security_xattr_deadlock(in)) {
dout(" skip splicing dn %p to inode %p"
" (security xattr deadlock)\n", dn, in);
Expand All @@ -1679,13 +1672,9 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
goto next_item;
}

realdn = splice_dentry(dn, in);
if (IS_ERR(realdn)) {
err = PTR_ERR(realdn);
d_drop(dn);
err = splice_dentry(&dn, in);
if (err < 0)
goto next_item;
}
dn = realdn;
}

ceph_dentry(dn)->offset = rde->offset;
Expand All @@ -1701,8 +1690,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
err = ret;
}
next_item:
if (dn)
dput(dn);
dput(dn);
}
out:
if (err == 0 && skipped == 0) {
Expand Down
Loading

0 comments on commit c7eaf34

Please sign in to comment.