Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/sage/ceph-client

Pull Ceph updates from Sage Weil:
 "This time around we have a collection of CephFS fixes from Zheng
  around MDS failure handling and snapshots, support for a new CRUSH
  straw2 algorithm (to sync up with userspace) and several RBD cleanups
  and fixes from Ilya, an error path leak fix from Taesoo, and then an
  assorted collection of cleanups from others"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (28 commits)
  rbd: rbd_wq comment is obsolete
  libceph: announce support for straw2 buckets
  crush: straw2 bucket type with an efficient 64-bit crush_ln()
  crush: ensuring at most num-rep osds are selected
  crush: drop unnecessary include from mapper.c
  ceph: fix uninline data function
  ceph: rename snapshot support
  ceph: fix null pointer dereference in send_mds_reconnect()
  ceph: hold on to exclusive caps on complete directories
  libceph: simplify our debugfs attr macro
  ceph: show non-default options only
  libceph: expose client options through debugfs
  libceph, ceph: split ceph_show_options()
  rbd: mark block queue as non-rotational
  libceph: don't overwrite specific con error msgs
  ceph: cleanup unsafe requests when reconnecting is denied
  ceph: don't zero i_wrbuffer_ref when reconnecting is denied
  ceph: don't mark dirty caps when there is no auth cap
  ceph: keep i_snap_realm while there are writers
  libceph: osdmap.h: Add missing format newlines
  ...
  • Loading branch information
torvalds committed Apr 22, 2015
2 parents 4f21123 + f77303b commit 1204c46
Show file tree
Hide file tree
Showing 22 changed files with 633 additions and 128 deletions.
26 changes: 20 additions & 6 deletions drivers/block/rbd.c
Original file line number Diff line number Diff line change
Expand Up @@ -3762,8 +3762,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
goto out_tag_set;
}

/* We use the default size, but let's be explicit about it. */
blk_queue_physical_block_size(q, SECTOR_SIZE);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */

/* set io sizes to object size */
segment_size = rbd_obj_bytes(&rbd_dev->header);
Expand Down Expand Up @@ -5301,8 +5301,13 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)

if (mapping) {
ret = rbd_dev_header_watch_sync(rbd_dev);
if (ret)
if (ret) {
if (ret == -ENOENT)
pr_info("image %s/%s does not exist\n",
rbd_dev->spec->pool_name,
rbd_dev->spec->image_name);
goto out_header_name;
}
}

ret = rbd_dev_header_info(rbd_dev);
Expand All @@ -5319,8 +5324,14 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
ret = rbd_spec_fill_snap_id(rbd_dev);
else
ret = rbd_spec_fill_names(rbd_dev);
if (ret)
if (ret) {
if (ret == -ENOENT)
pr_info("snap %s/%s@%s does not exist\n",
rbd_dev->spec->pool_name,
rbd_dev->spec->image_name,
rbd_dev->spec->snap_name);
goto err_out_probe;
}

if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
ret = rbd_dev_v2_parent_info(rbd_dev);
Expand Down Expand Up @@ -5390,8 +5401,11 @@ static ssize_t do_rbd_add(struct bus_type *bus,

/* pick the pool */
rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
if (rc < 0)
if (rc < 0) {
if (rc == -ENOENT)
pr_info("pool %s does not exist\n", spec->pool_name);
goto err_out_client;
}
spec->pool_id = (u64)rc;

/* The ceph file layout needs to fit pool id in 32 bits */
Expand Down Expand Up @@ -5673,7 +5687,7 @@ static int __init rbd_init(void)

/*
* The number of active work items is limited by the number of
* rbd devices, so leave @max_active at default.
* rbd devices * queue depth, so leave @max_active at default.
*/
rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0);
if (!rbd_wq) {
Expand Down
38 changes: 25 additions & 13 deletions fs/ceph/addr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
inode, page, (int)pos, (int)len);

r = ceph_update_writeable_page(file, pos, len, page);
if (r < 0)
page_cache_release(page);
else
*pagep = page;
} while (r == -EAGAIN);

return r;
Expand Down Expand Up @@ -1534,19 +1538,27 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)

osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);

err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
"inline_version", &inline_version,
sizeof(inline_version),
CEPH_OSD_CMPXATTR_OP_GT,
CEPH_OSD_CMPXATTR_MODE_U64);
if (err)
goto out_put;

err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
"inline_version", &inline_version,
sizeof(inline_version), 0, 0);
if (err)
goto out_put;
{
__le64 xattr_buf = cpu_to_le64(inline_version);
err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
"inline_version", &xattr_buf,
sizeof(xattr_buf),
CEPH_OSD_CMPXATTR_OP_GT,
CEPH_OSD_CMPXATTR_MODE_U64);
if (err)
goto out_put;
}

{
char xattr_buf[32];
int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
"%llu", inline_version);
err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
"inline_version",
xattr_buf, xattr_len, 0, 0);
if (err)
goto out_put;
}

ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
Expand Down
51 changes: 41 additions & 10 deletions fs/ceph/caps.c
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode)
return ret;
}

static void drop_inode_snap_realm(struct ceph_inode_info *ci)
{
struct ceph_snap_realm *realm = ci->i_snap_realm;
spin_lock(&realm->inodes_with_caps_lock);
list_del_init(&ci->i_snap_realm_item);
ci->i_snap_realm_counter++;
ci->i_snap_realm = NULL;
spin_unlock(&realm->inodes_with_caps_lock);
ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
realm);
}

/*
* Remove a cap. Take steps to deal with a racing iterate_session_caps.
*
Expand Down Expand Up @@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
if (removed)
ceph_put_cap(mdsc, cap);

if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
struct ceph_snap_realm *realm = ci->i_snap_realm;
spin_lock(&realm->inodes_with_caps_lock);
list_del_init(&ci->i_snap_realm_item);
ci->i_snap_realm_counter++;
ci->i_snap_realm = NULL;
spin_unlock(&realm->inodes_with_caps_lock);
ceph_put_snap_realm(mdsc, realm);
}
/* when reconnect denied, we remove session caps forcibly,
* i_wr_ref can be non-zero. If there are ongoing write,
* keep i_snap_realm.
*/
if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
drop_inode_snap_realm(ci);

if (!__ceph_is_any_real_caps(ci))
__cap_delay_cancel(mdsc, ci);
}
Expand Down Expand Up @@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
int was = ci->i_dirty_caps;
int dirty = 0;

if (!ci->i_auth_cap) {
pr_warn("__mark_dirty_caps %p %llx mask %s, "
"but no auth cap (session was closed?)\n",
inode, ceph_ino(inode), ceph_cap_string(mask));
return 0;
}

dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
ceph_cap_string(mask), ceph_cap_string(was),
ceph_cap_string(was | mask));
Expand All @@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
ci->i_snap_realm->cached_context);
dout(" inode %p now dirty snapc %p auth cap %p\n",
&ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
WARN_ON(!ci->i_auth_cap);
BUG_ON(!list_empty(&ci->i_dirty_item));
spin_lock(&mdsc->cap_dirty_lock);
list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
Expand Down Expand Up @@ -1545,7 +1561,19 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
if (!mdsc->stopping && inode->i_nlink > 0) {
if (want) {
retain |= CEPH_CAP_ANY; /* be greedy */
} else if (S_ISDIR(inode->i_mode) &&
(issued & CEPH_CAP_FILE_SHARED) &&
__ceph_dir_is_complete(ci)) {
/*
* If a directory is complete, we want to keep
* the exclusive cap. So that MDS does not end up
* revoking the shared cap on every create/unlink
* operation.
*/
want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
retain |= want;
} else {

retain |= CEPH_CAP_ANY_SHARED;
/*
* keep RD only if we didn't have the file open RW,
Expand Down Expand Up @@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
wake = 1;
}
}
/* see comment in __ceph_remove_cap() */
if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
drop_inode_snap_realm(ci);
}
spin_unlock(&ci->i_ceph_lock);

Expand Down
48 changes: 34 additions & 14 deletions fs/ceph/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* can we use the dcache? */
spin_lock(&ci->i_ceph_lock);
if ((ctx->pos == 2 || fi->dentry) &&
ceph_test_mount_opt(fsc, DCACHE) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
__ceph_dir_is_complete_ordered(ci) &&
Expand Down Expand Up @@ -336,16 +337,23 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
ceph_mdsc_put_request(req);
return err;
}
req->r_inode = inode;
ihold(inode);
req->r_dentry = dget(file->f_path.dentry);
/* hints to request -> mds selection code */
req->r_direct_mode = USE_AUTH_MDS;
req->r_direct_hash = ceph_frag_value(frag);
req->r_direct_is_hash = true;
req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
if (fi->last_name) {
req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
if (!req->r_path2) {
ceph_mdsc_put_request(req);
return -ENOMEM;
}
}
req->r_readdir_offset = fi->next_offset;
req->r_args.readdir.frag = cpu_to_le32(frag);

req->r_inode = inode;
ihold(inode);
req->r_dentry = dget(file->f_path.dentry);
err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err < 0) {
ceph_mdsc_put_request(req);
Expand Down Expand Up @@ -629,6 +637,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
fsc->mount_options->snapdir_name,
dentry->d_name.len) &&
!is_root_ceph_dentry(dir, dentry) &&
ceph_test_mount_opt(fsc, DCACHE) &&
__ceph_dir_is_complete(ci) &&
(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
spin_unlock(&ci->i_ceph_lock);
Expand Down Expand Up @@ -755,10 +764,15 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
err = PTR_ERR(req);
goto out;
}
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_path2 = kstrdup(dest, GFP_NOFS);
if (!req->r_path2) {
err = -ENOMEM;
ceph_mdsc_put_request(req);
goto out;
}
req->r_locked_dir = dir;
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
err = ceph_mdsc_do_request(mdsc, dir, req);
Expand Down Expand Up @@ -933,16 +947,20 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int op = CEPH_MDS_OP_RENAME;
int err;

if (ceph_snap(old_dir) != ceph_snap(new_dir))
return -EXDEV;
if (ceph_snap(old_dir) != CEPH_NOSNAP ||
ceph_snap(new_dir) != CEPH_NOSNAP)
return -EROFS;
if (ceph_snap(old_dir) != CEPH_NOSNAP) {
if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
op = CEPH_MDS_OP_RENAMESNAP;
else
return -EROFS;
}
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
ihold(old_dir);
Expand Down Expand Up @@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
dout("dir_fsync %p wait on tid %llu (until %llu)\n",
inode, req->r_tid, last_tid);
if (req->r_timeout) {
ret = wait_for_completion_timeout(
&req->r_safe_completion, req->r_timeout);
if (ret > 0)
unsigned long time_left = wait_for_completion_timeout(
&req->r_safe_completion,
req->r_timeout);
if (time_left > 0)
ret = 0;
else if (ret == 0)
else
ret = -EIO; /* timed out */
} else {
wait_for_completion(&req->r_safe_completion);
Expand Down Expand Up @@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = {
.getattr = ceph_getattr,
.mkdir = ceph_mkdir,
.rmdir = ceph_unlink,
.rename = ceph_rename,
};

const struct dentry_operations ceph_dentry_ops = {
Expand Down
Loading

0 comments on commit 1204c46

Please sign in to comment.