Skip to content

Commit

Permalink
Merge tag 'ceph-for-6.10-rc1' of https://github.com/ceph/ceph-client
Browse files Browse the repository at this point in the history
Pull ceph updates from Ilya Dryomov:
 "A series from Xiubo that adds support for additional access checks
  based on MDS auth caps which were recently made available to clients.

  This is needed to prevent scenarios where the MDS quietly discards
  updates that a UID-restricted client previously (wrongfully) acked to
  the user.

  Other than that, just a documentation fixup"

* tag 'ceph-for-6.10-rc1' of https://github.com/ceph/ceph-client:
  doc: ceph: update userspace command to get CephFS metadata
  ceph: add CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK feature bit
  ceph: check the cephx mds auth access for async dirop
  ceph: check the cephx mds auth access for open
  ceph: check the cephx mds auth access for setattr
  ceph: add ceph_mds_check_access() helper
  ceph: save cap_auths in MDS client when session is opened
  • Loading branch information
torvalds committed May 25, 2024
2 parents 89b61ca + 93a2221 commit 74eca35
Show file tree
Hide file tree
Showing 6 changed files with 434 additions and 19 deletions.
15 changes: 9 additions & 6 deletions Documentation/filesystems/ceph.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,15 @@ Snapshot names have two limitations:
more than 255 characters, and `<node-id>` takes 13 characters, the long
snapshot names can take as much as 255 - 1 - 1 - 13 = 240.

Ceph also provides some recursive accounting on directories for nested
files and bytes. That is, a 'getfattr -d foo' on any directory in the
system will reveal the total number of nested regular files and
subdirectories, and a summation of all nested file sizes. This makes
the identification of large disk space consumers relatively quick, as
no 'du' or similar recursive scan of the file system is required.
Ceph also provides some recursive accounting on directories for nested files
and bytes. You can run the commands::

getfattr -n ceph.dir.rfiles /some/dir
getfattr -n ceph.dir.rbytes /some/dir

to get the total number of nested files and their combined size, respectively.
This makes the identification of large disk space consumers relatively quick,
as no 'du' or similar recursive scan of the file system is required.

Finally, Ceph also allows quotas to be set on any directory in the system.
The quota can restrict the number of bytes or the number of files stored
Expand Down
28 changes: 28 additions & 0 deletions fs/ceph/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -1336,8 +1336,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = d_inode(dentry);
struct ceph_mds_request *req;
bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
struct dentry *dn;
int err = -EROFS;
int op;
char *path;
int pathlen;
u64 pathbase;

if (ceph_snap(dir) == CEPH_SNAPDIR) {
/* rmdir .snap/foo is RMSNAP */
Expand All @@ -1351,6 +1355,30 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
} else
goto out;

dn = d_find_alias(dir);
if (!dn) {
try_async = false;
} else {
path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
if (IS_ERR(path)) {
try_async = false;
err = 0;
} else {
err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
}
ceph_mdsc_free_path(path, pathlen);
dput(dn);

/* For none EACCES cases will let the MDS do the mds auth check */
if (err == -EACCES) {
return err;
} else if (err < 0) {
try_async = false;
err = 0;
}
}

retry:
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req)) {
Expand Down
66 changes: 64 additions & 2 deletions fs/ceph/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,12 @@ int ceph_open(struct inode *inode, struct file *file)
struct ceph_file_info *fi = file->private_data;
int err;
int flags, fmode, wanted;
struct dentry *dentry;
char *path;
int pathlen;
u64 pathbase;
bool do_sync = false;
int mask = MAY_READ;

if (fi) {
doutc(cl, "file %p is already opened\n", file);
Expand All @@ -387,6 +393,31 @@ int ceph_open(struct inode *inode, struct file *file)
fmode = ceph_flags_to_mode(flags);
wanted = ceph_caps_for_mode(fmode);

if (fmode & CEPH_FILE_MODE_WR)
mask |= MAY_WRITE;
dentry = d_find_alias(inode);
if (!dentry) {
do_sync = true;
} else {
path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
if (IS_ERR(path)) {
do_sync = true;
err = 0;
} else {
err = ceph_mds_check_access(mdsc, path, mask);
}
ceph_mdsc_free_path(path, pathlen);
dput(dentry);

/* For none EACCES cases will let the MDS do the mds auth check */
if (err == -EACCES) {
return err;
} else if (err < 0) {
do_sync = true;
err = 0;
}
}

/* snapped files are read-only */
if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE))
return -EROFS;
Expand All @@ -402,7 +433,7 @@ int ceph_open(struct inode *inode, struct file *file)
* asynchronously.
*/
spin_lock(&ci->i_ceph_lock);
if (__ceph_is_any_real_caps(ci) &&
if (!do_sync && __ceph_is_any_real_caps(ci) &&
(((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
int mds_wanted = __ceph_caps_mds_wanted(ci, true);
int issued = __ceph_caps_issued(ci, NULL);
Expand All @@ -420,7 +451,7 @@ int ceph_open(struct inode *inode, struct file *file)
ceph_check_caps(ci, 0);

return ceph_init_file(inode, file, fmode);
} else if (ceph_snap(inode) != CEPH_NOSNAP &&
} else if (!do_sync && ceph_snap(inode) != CEPH_NOSNAP &&
(ci->i_snap_caps & wanted) == wanted) {
__ceph_touch_fmode(ci, mdsc, fmode);
spin_unlock(&ci->i_ceph_lock);
Expand Down Expand Up @@ -759,6 +790,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
int mask;
int err;
char *path;
int pathlen;
u64 pathbase;

doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n",
dir, ceph_vinop(dir), dentry, dentry,
Expand All @@ -776,6 +810,34 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
*/
flags &= ~O_TRUNC;

dn = d_find_alias(dir);
if (!dn) {
try_async = false;
} else {
path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
if (IS_ERR(path)) {
try_async = false;
err = 0;
} else {
int fmode = ceph_flags_to_mode(flags);

mask = MAY_READ;
if (fmode & CEPH_FILE_MODE_WR)
mask |= MAY_WRITE;
err = ceph_mds_check_access(mdsc, path, mask);
}
ceph_mdsc_free_path(path, pathlen);
dput(dn);

/* For none EACCES cases will let the MDS do the mds auth check */
if (err == -EACCES) {
return err;
} else if (err < 0) {
try_async = false;
err = 0;
}
}

retry:
if (flags & O_CREAT) {
if (ceph_quota_is_max_files_exceeded(dir))
Expand Down
46 changes: 37 additions & 9 deletions fs/ceph/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -2482,6 +2482,34 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
bool lock_snap_rwsem = false;
bool fill_fscrypt;
int truncate_retry = 20; /* The RMW will take around 50ms */
struct dentry *dentry;
char *path;
int pathlen;
u64 pathbase;
bool do_sync = false;

dentry = d_find_alias(inode);
if (!dentry) {
do_sync = true;
} else {
path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
if (IS_ERR(path)) {
do_sync = true;
err = 0;
} else {
err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
}
ceph_mdsc_free_path(path, pathlen);
dput(dentry);

/* For none EACCES cases will let the MDS do the mds auth check */
if (err == -EACCES) {
return err;
} else if (err < 0) {
do_sync = true;
err = 0;
}
}

retry:
prealloc_cf = ceph_alloc_cap_flush();
Expand Down Expand Up @@ -2528,7 +2556,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
/* It should never be re-set once set */
WARN_ON_ONCE(ci->fscrypt_auth);

if (issued & CEPH_CAP_AUTH_EXCL) {
if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) {
dirtied |= CEPH_CAP_AUTH_EXCL;
kfree(ci->fscrypt_auth);
ci->fscrypt_auth = (u8 *)cia->fscrypt_auth;
Expand Down Expand Up @@ -2557,7 +2585,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
ceph_vinop(inode),
from_kuid(&init_user_ns, inode->i_uid),
from_kuid(&init_user_ns, attr->ia_uid));
if (issued & CEPH_CAP_AUTH_EXCL) {
if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) {
inode->i_uid = fsuid;
dirtied |= CEPH_CAP_AUTH_EXCL;
} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
Expand All @@ -2575,7 +2603,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
ceph_vinop(inode),
from_kgid(&init_user_ns, inode->i_gid),
from_kgid(&init_user_ns, attr->ia_gid));
if (issued & CEPH_CAP_AUTH_EXCL) {
if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) {
inode->i_gid = fsgid;
dirtied |= CEPH_CAP_AUTH_EXCL;
} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
Expand All @@ -2589,7 +2617,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
if (ia_valid & ATTR_MODE) {
doutc(cl, "%p %llx.%llx mode 0%o -> 0%o\n", inode,
ceph_vinop(inode), inode->i_mode, attr->ia_mode);
if (issued & CEPH_CAP_AUTH_EXCL) {
if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) {
inode->i_mode = attr->ia_mode;
dirtied |= CEPH_CAP_AUTH_EXCL;
} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
Expand All @@ -2608,11 +2636,11 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
inode, ceph_vinop(inode),
atime.tv_sec, atime.tv_nsec,
attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) {
if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) {
ci->i_time_warp_seq++;
inode_set_atime_to_ts(inode, attr->ia_atime);
dirtied |= CEPH_CAP_FILE_EXCL;
} else if ((issued & CEPH_CAP_FILE_WR) &&
} else if (!do_sync && (issued & CEPH_CAP_FILE_WR) &&
timespec64_compare(&atime,
&attr->ia_atime) < 0) {
inode_set_atime_to_ts(inode, attr->ia_atime);
Expand Down Expand Up @@ -2648,7 +2676,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
CEPH_FSCRYPT_BLOCK_SIZE));
req->r_fscrypt_file = attr->ia_size;
fill_fscrypt = true;
} else if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) {
} else if (!do_sync && (issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) {
if (attr->ia_size > isize) {
i_size_write(inode, attr->ia_size);
inode->i_blocks = calc_inode_blocks(attr->ia_size);
Expand Down Expand Up @@ -2685,11 +2713,11 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
inode, ceph_vinop(inode),
mtime.tv_sec, mtime.tv_nsec,
attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) {
if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) {
ci->i_time_warp_seq++;
inode_set_mtime_to_ts(inode, attr->ia_mtime);
dirtied |= CEPH_CAP_FILE_EXCL;
} else if ((issued & CEPH_CAP_FILE_WR) &&
} else if (!do_sync && (issued & CEPH_CAP_FILE_WR) &&
timespec64_compare(&mtime, &attr->ia_mtime) < 0) {
inode_set_mtime_to_ts(inode, attr->ia_mtime);
dirtied |= CEPH_CAP_FILE_WR;
Expand Down
Loading

0 comments on commit 74eca35

Please sign in to comment.