From 8e6c848eceaa38a7e0192953b08162467e51f852 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Dec 2017 17:12:45 -0500 Subject: [PATCH 1/9] new primitive: vfs_mkobj() Similar to vfs_create(), but with caller-supplied callback (and argument for it) to be used instead of ->create(). Signed-off-by: Al Viro --- fs/namei.c | 21 +++++++++++++++++++++ include/linux/fs.h | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index 9cc91fb7f15654..1c0fb97c94251e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2898,6 +2898,27 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } EXPORT_SYMBOL(vfs_create); +int vfs_mkobj(struct dentry *dentry, umode_t mode, + int (*f)(struct dentry *, umode_t, void *), + void *arg) +{ + struct inode *dir = dentry->d_parent->d_inode; + int error = may_create(dir, dentry); + if (error) + return error; + + mode &= S_IALLUGO; + mode |= S_IFREG; + error = security_inode_create(dir, dentry, mode); + if (error) + return error; + error = f(dentry, mode, arg); + if (!error) + fsnotify_create(dir, dentry); + return error; +} +EXPORT_SYMBOL(vfs_mkobj); + bool may_open_dev(const struct path *path) { return !(path->mnt->mnt_flags & MNT_NODEV) && diff --git a/include/linux/fs.h b/include/linux/fs.h index 511fbaabf6248b..aad23d4fae47fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1608,6 +1608,10 @@ extern int vfs_whiteout(struct inode *, struct dentry *); extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag); +int vfs_mkobj(struct dentry *, umode_t, + int (*f)(struct dentry *, umode_t, void *), + void *); + /* * VFS file helper functions. */ From a4a0683fd5e64e029421a465525352f01d57f27a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Dec 2017 17:22:19 -0500 Subject: [PATCH 2/9] bpf_obj_do_pin(): switch to vfs_mkobj(), quit abusing ->mknod() Signed-off-by: Al Viro --- kernel/bpf/inode.c | 50 ++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 01aaef1a77c5af..2b75faccc7718b 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -150,39 +150,29 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return 0; } -static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, - umode_t mode, const struct inode_operations *iops) +static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, + const struct inode_operations *iops) { - struct inode *inode; - - inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); + struct inode *dir = dentry->d_parent->d_inode; + struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = iops; - inode->i_private = dentry->d_fsdata; + inode->i_private = raw; bpf_dentry_finalize(dentry, inode, dir); return 0; } -static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, - dev_t devt) +static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) { - enum bpf_type type = MINOR(devt); - - if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) || - dentry->d_fsdata == NULL) - return -EPERM; + return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops); +} - switch (type) { - case BPF_TYPE_PROG: - return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); - case BPF_TYPE_MAP: - return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops); - default: - return -EPERM; - } +static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) +{ + return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops); } static struct dentry * @@ -218,7 +208,6 @@ static int bpf_symlink(struct inode *dir, struct dentry *dentry, static const struct inode_operations bpf_dir_iops = { .lookup = bpf_lookup, - .mknod = bpf_mkobj, .mkdir = bpf_mkdir, .symlink = bpf_symlink, .rmdir = simple_rmdir, @@ -234,7 +223,6 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw, struct inode *dir; struct path path; umode_t mode; - dev_t devt; int ret; dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); @@ -242,9 +230,8 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw, return PTR_ERR(dentry); mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); - devt = MKDEV(UNNAMED_MAJOR, type); - ret = security_path_mknod(&path, dentry, mode, devt); + ret = security_path_mknod(&path, dentry, mode, 0); if (ret) goto out; @@ -254,9 +241,16 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw, goto out; } - dentry->d_fsdata = raw; - ret = vfs_mknod(dir, dentry, mode, devt); - dentry->d_fsdata = NULL; + switch (type) { + case BPF_TYPE_PROG: + ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw); + break; + case BPF_TYPE_MAP: + ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); + break; + default: + ret = -EPERM; + } out: done_path_create(&path, dentry); return ret; From eecec19d9e705cb670a387d18973feeffd412970 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Dec 2017 17:26:05 -0500 Subject: [PATCH 3/9] mqueue: switch to vfs_mkobj(), quit abusing ->d_fsdata Signed-off-by: Al Viro --- ipc/mqueue.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 9649ecd8a73a70..e011ccd4c798d6 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -416,11 +416,11 @@ static void mqueue_evict_inode(struct inode *inode) put_ipc_ns(ipc_ns); } -static int mqueue_create(struct inode *dir, struct dentry *dentry, - umode_t mode, bool excl) +static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg) { + struct inode *dir = dentry->d_parent->d_inode; struct inode *inode; - struct mq_attr *attr = dentry->d_fsdata; + struct mq_attr *attr = arg; int error; struct ipc_namespace *ipc_ns; @@ -461,6 +461,12 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry, return error; } +static int mqueue_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool excl) +{ + return mqueue_create_attr(dentry, mode, NULL); +} + static int mqueue_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); @@ -732,8 +738,6 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, ret = mq_attr_ok(ipc_ns, attr); if (ret) return ERR_PTR(ret); - /* store for use during create */ - path->dentry->d_fsdata = attr; } else { struct mq_attr def_attr; @@ -746,9 +750,8 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, return ERR_PTR(ret); } - mode &= ~current_umask(); - ret = vfs_create(dir, path->dentry, mode, true); - path->dentry->d_fsdata = NULL; + ret = vfs_mkobj(path->dentry, mode & ~current_umask(), + mqueue_create_attr, attr); if (ret) return ERR_PTR(ret); return dentry_open(path, oflag, cred); From af4a5372e4166e8f13cb5cd36d936a0f56bc8418 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Dec 2017 17:34:22 -0500 Subject: [PATCH 4/9] move dentry_open() calls up into do_mq_open() Signed-off-by: Al Viro --- ipc/mqueue.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index e011ccd4c798d6..fee67559acbe42 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -727,17 +727,16 @@ static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) /* * Invoked when creating a new queue via sys_mq_open */ -static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, +static int do_create(struct ipc_namespace *ipc_ns, struct inode *dir, struct path *path, int oflag, umode_t mode, struct mq_attr *attr) { - const struct cred *cred = current_cred(); int ret; if (attr) { ret = mq_attr_ok(ipc_ns, attr); if (ret) - return ERR_PTR(ret); + return ret; } else { struct mq_attr def_attr; @@ -747,28 +746,23 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, ipc_ns->mq_msgsize_default); ret = mq_attr_ok(ipc_ns, &def_attr); if (ret) - return ERR_PTR(ret); + return ret; } - ret = vfs_mkobj(path->dentry, mode & ~current_umask(), + return vfs_mkobj(path->dentry, mode & ~current_umask(), mqueue_create_attr, attr); - if (ret) - return ERR_PTR(ret); - return dentry_open(path, oflag, cred); } /* Opens existing queue */ -static struct file *do_open(struct path *path, int oflag) +static int do_open(struct path *path, int oflag) { static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE }; int acc; if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) - return ERR_PTR(-EINVAL); + return -EINVAL; acc = oflag2acc[oflag & O_ACCMODE]; - if (inode_permission(d_inode(path->dentry), acc)) - return ERR_PTR(-EACCES); - return dentry_open(path, oflag, current_cred()); + return inode_permission(d_inode(path->dentry), acc); } static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, @@ -805,28 +799,30 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, if (oflag & O_CREAT) { if (d_really_is_positive(path.dentry)) { /* entry already exists */ audit_inode(name, path.dentry, 0); - if (oflag & O_EXCL) { + if (oflag & O_EXCL) error = -EEXIST; - goto out; - } - filp = do_open(&path, oflag); + else + error = do_open(&path, oflag); } else { if (ro) { error = ro; - goto out; + } else { + audit_inode_parent_hidden(name, root); + error = do_create(ipc_ns, d_inode(root), &path, + oflag, mode, attr); } - audit_inode_parent_hidden(name, root); - filp = do_create(ipc_ns, d_inode(root), &path, - oflag, mode, attr); } } else { if (d_really_is_negative(path.dentry)) { error = -ENOENT; - goto out; + } else { + audit_inode(name, path.dentry, 0); + error = do_open(&path, oflag); } - audit_inode(name, path.dentry, 0); - filp = do_open(&path, oflag); } + if (error) + goto out; + filp = dentry_open(&path, oflag, current_cred()); if (!IS_ERR(filp)) fd_install(fd, filp); From 05c1b29038acb05a52dc113ae2ab646f9a92bbfd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Dec 2017 17:43:43 -0500 Subject: [PATCH 5/9] mqueue: fold mq_attr_ok() into mqueue_get_inode() Signed-off-by: Al Viro --- ipc/mqueue.c | 71 ++++++++++++++++------------------------------------ 1 file changed, 21 insertions(+), 50 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index fee67559acbe42..a2483524b6333e 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -270,13 +270,30 @@ static struct inode *mqueue_get_inode(struct super_block *sb, * that means the min(mq_maxmsg, max_priorities) * struct * posix_msg_tree_node. */ + + ret = -EINVAL; + if (info->attr.mq_maxmsg <= 0 || info->attr.mq_msgsize <= 0) + goto out_inode; + if (capable(CAP_SYS_RESOURCE)) { + if (info->attr.mq_maxmsg > HARD_MSGMAX || + info->attr.mq_msgsize > HARD_MSGSIZEMAX) + goto out_inode; + } else { + if (info->attr.mq_maxmsg > ipc_ns->mq_msg_max || + info->attr.mq_msgsize > ipc_ns->mq_msgsize_max) + goto out_inode; + } + ret = -EOVERFLOW; + /* check for overflow */ + if (info->attr.mq_msgsize > ULONG_MAX/info->attr.mq_maxmsg) + goto out_inode; mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * sizeof(struct posix_msg_tree_node); - - mq_bytes = mq_treesize + (info->attr.mq_maxmsg * - info->attr.mq_msgsize); - + mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize; + if (mq_bytes + mq_treesize < mq_bytes) + goto out_inode; + mq_bytes += mq_treesize; spin_lock(&mq_lock); if (u->mq_bytes + mq_bytes < u->mq_bytes || u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) { @@ -696,34 +713,6 @@ static void remove_notification(struct mqueue_inode_info *info) info->notify_user_ns = NULL; } -static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) -{ - int mq_treesize; - unsigned long total_size; - - if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0) - return -EINVAL; - if (capable(CAP_SYS_RESOURCE)) { - if (attr->mq_maxmsg > HARD_MSGMAX || - attr->mq_msgsize > HARD_MSGSIZEMAX) - return -EINVAL; - } else { - if (attr->mq_maxmsg > ipc_ns->mq_msg_max || - attr->mq_msgsize > ipc_ns->mq_msgsize_max) - return -EINVAL; - } - /* check for overflow */ - if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg) - return -EOVERFLOW; - mq_treesize = attr->mq_maxmsg * sizeof(struct msg_msg) + - min_t(unsigned int, attr->mq_maxmsg, MQ_PRIO_MAX) * - sizeof(struct posix_msg_tree_node); - total_size = attr->mq_maxmsg * attr->mq_msgsize; - if (total_size + mq_treesize < total_size) - return -EOVERFLOW; - return 0; -} - /* * Invoked when creating a new queue via sys_mq_open */ @@ -731,24 +720,6 @@ static int do_create(struct ipc_namespace *ipc_ns, struct inode *dir, struct path *path, int oflag, umode_t mode, struct mq_attr *attr) { - int ret; - - if (attr) { - ret = mq_attr_ok(ipc_ns, attr); - if (ret) - return ret; - } else { - struct mq_attr def_attr; - - def_attr.mq_maxmsg = min(ipc_ns->mq_msg_max, - ipc_ns->mq_msg_default); - def_attr.mq_msgsize = min(ipc_ns->mq_msgsize_max, - ipc_ns->mq_msgsize_default); - ret = mq_attr_ok(ipc_ns, &def_attr); - if (ret) - return ret; - } - return vfs_mkobj(path->dentry, mode & ~current_umask(), mqueue_create_attr, attr); } From 066cc813e94a344ae4482af310d324739955c3b5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Dec 2017 17:51:39 -0500 Subject: [PATCH 6/9] do_mq_open(): move all work prior to dentry_open() into a helper Signed-off-by: Al Viro --- ipc/mqueue.c | 77 +++++++++++++++++++++------------------------------- 1 file changed, 31 insertions(+), 46 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index a2483524b6333e..e8a872e9c8083e 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -713,34 +713,44 @@ static void remove_notification(struct mqueue_inode_info *info) info->notify_user_ns = NULL; } -/* - * Invoked when creating a new queue via sys_mq_open - */ -static int do_create(struct ipc_namespace *ipc_ns, struct inode *dir, - struct path *path, int oflag, umode_t mode, +static int prepare_open(struct dentry *dentry, int oflag, int ro, + umode_t mode, struct filename *name, struct mq_attr *attr) -{ - return vfs_mkobj(path->dentry, mode & ~current_umask(), - mqueue_create_attr, attr); -} - -/* Opens existing queue */ -static int do_open(struct path *path, int oflag) { static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE }; int acc; + + if (oflag & O_CREAT) { + if (d_really_is_positive(dentry)) { /* entry already exists */ + audit_inode(name, dentry, 0); + if (oflag & O_EXCL) + return -EEXIST; + } else { + if (ro) + return ro; + + audit_inode_parent_hidden(name, dentry->d_parent); + return vfs_mkobj(dentry, mode & ~current_umask(), + mqueue_create_attr, attr); + } + } else { + if (d_really_is_negative(dentry)) { + return -ENOENT; + } else { + audit_inode(name, dentry, 0); + } + } if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) return -EINVAL; acc = oflag2acc[oflag & O_ACCMODE]; - return inode_permission(d_inode(path->dentry), acc); + return inode_permission(d_inode(dentry), acc); } static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, struct mq_attr *attr) { struct path path; - struct file *filp; struct filename *name; int fd, error; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; @@ -767,39 +777,14 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, } path.mnt = mntget(mnt); - if (oflag & O_CREAT) { - if (d_really_is_positive(path.dentry)) { /* entry already exists */ - audit_inode(name, path.dentry, 0); - if (oflag & O_EXCL) - error = -EEXIST; - else - error = do_open(&path, oflag); - } else { - if (ro) { - error = ro; - } else { - audit_inode_parent_hidden(name, root); - error = do_create(ipc_ns, d_inode(root), &path, - oflag, mode, attr); - } - } - } else { - if (d_really_is_negative(path.dentry)) { - error = -ENOENT; - } else { - audit_inode(name, path.dentry, 0); - error = do_open(&path, oflag); - } + error = prepare_open(path.dentry, oflag, ro, mode, name, attr); + if (!error) { + struct file *file = dentry_open(&path, oflag, current_cred()); + if (!IS_ERR(file)) + fd_install(fd, file); + else + error = PTR_ERR(file); } - if (error) - goto out; - filp = dentry_open(&path, oflag, current_cred()); - - if (!IS_ERR(filp)) - fd_install(fd, filp); - else - error = PTR_ERR(filp); -out: path_put(&path); out_putfd: if (error) { From 9b20d7fc5250f50660c4dd4bea52e54e373b678d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Dec 2017 17:57:02 -0500 Subject: [PATCH 7/9] mqueue: clean prepare_open() up Signed-off-by: Al Viro --- ipc/mqueue.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index e8a872e9c8083e..ce498ff906bb69 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -721,26 +721,19 @@ static int prepare_open(struct dentry *dentry, int oflag, int ro, MAY_READ | MAY_WRITE }; int acc; - if (oflag & O_CREAT) { - if (d_really_is_positive(dentry)) { /* entry already exists */ - audit_inode(name, dentry, 0); - if (oflag & O_EXCL) - return -EEXIST; - } else { - if (ro) - return ro; - - audit_inode_parent_hidden(name, dentry->d_parent); - return vfs_mkobj(dentry, mode & ~current_umask(), - mqueue_create_attr, attr); - } - } else { - if (d_really_is_negative(dentry)) { + if (d_really_is_negative(dentry)) { + if (!(oflag & O_CREAT)) return -ENOENT; - } else { - audit_inode(name, dentry, 0); - } + if (ro) + return ro; + audit_inode_parent_hidden(name, dentry->d_parent); + return vfs_mkobj(dentry, mode & ~current_umask(), + mqueue_create_attr, attr); } + /* it already existed */ + audit_inode(name, dentry, 0); + if ((oflag & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + return -EEXIST; if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) return -EINVAL; acc = oflag2acc[oflag & O_ACCMODE]; From a713fd7f529e361d42aa9dcc511e3fd3016599fe Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Dec 2017 18:01:09 -0500 Subject: [PATCH 8/9] tidy do_mq_open() up a bit Signed-off-by: Al Viro --- ipc/mqueue.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index ce498ff906bb69..9f05837a86a644 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -743,12 +743,11 @@ static int prepare_open(struct dentry *dentry, int oflag, int ro, static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, struct mq_attr *attr) { - struct path path; + struct vfsmount *mnt = current->nsproxy->ipc_ns->mq_mnt; + struct dentry *root = mnt->mnt_root; struct filename *name; + struct path path; int fd, error; - struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - struct vfsmount *mnt = ipc_ns->mq_mnt; - struct dentry *root = mnt->mnt_root; int ro; audit_mq_open(oflag, mode, attr); @@ -761,7 +760,6 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, goto out_putname; ro = mnt_want_write(mnt); /* we'll drop it in any case */ - error = 0; inode_lock(d_inode(root)); path.dentry = lookup_one_len(name->name, root, strlen(name->name)); if (IS_ERR(path.dentry)) { @@ -769,7 +767,6 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, goto out_putfd; } path.mnt = mntget(mnt); - error = prepare_open(path.dentry, oflag, ro, mode, name, attr); if (!error) { struct file *file = dentry_open(&path, oflag, current_cred()); From 36735a6a2b5e042db1af956ce4bcc13f3ff99e21 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Dec 2017 19:43:35 -0500 Subject: [PATCH 9/9] mqueue: switch to on-demand creation of internal mount Instead of doing that upon each ipcns creation, we do that the first time mq_open(2) or mqueue mount is done in an ipcns. What's more, doing that allows to get rid of mount_ns() use - we can go with considerably cheaper mount_nodev(), avoiding the loop over all mqueue superblock instances; ipcns->mq_mnt is used to locate preexisting instance in O(1) time instead of O(instances) mount_ns() would've cost us. Based upon the version by Giuseppe Scrivano ; I've added handling of userland mqueue mounts (original had been broken in that area) and added a switch to mount_nodev(). Signed-off-by: Al Viro --- ipc/mqueue.c | 74 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 9f05837a86a644..738579191fd22c 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -325,8 +325,9 @@ static struct inode *mqueue_get_inode(struct super_block *sb, static int mqueue_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; - struct ipc_namespace *ns = sb->s_fs_info; + struct ipc_namespace *ns = data; + sb->s_fs_info = ns; sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -343,18 +344,44 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) return 0; } +static struct file_system_type mqueue_fs_type; +/* + * Return value is pinned only by reference in ->mq_mnt; it will + * live until ipcns dies. Caller does not need to drop it. + */ +static struct vfsmount *mq_internal_mount(void) +{ + struct ipc_namespace *ns = current->nsproxy->ipc_ns; + struct vfsmount *m = ns->mq_mnt; + if (m) + return m; + m = kern_mount_data(&mqueue_fs_type, ns); + spin_lock(&mq_lock); + if (unlikely(ns->mq_mnt)) { + spin_unlock(&mq_lock); + if (!IS_ERR(m)) + kern_unmount(m); + return ns->mq_mnt; + } + if (!IS_ERR(m)) + ns->mq_mnt = m; + spin_unlock(&mq_lock); + return m; +} + static struct dentry *mqueue_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct ipc_namespace *ns; - if (flags & SB_KERNMOUNT) { - ns = data; - data = NULL; - } else { - ns = current->nsproxy->ipc_ns; - } - return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super); + struct vfsmount *m; + if (flags & SB_KERNMOUNT) + return mount_nodev(fs_type, flags, data, mqueue_fill_super); + m = mq_internal_mount(); + if (IS_ERR(m)) + return ERR_CAST(m); + atomic_inc(&m->mnt_sb->s_active); + down_write(&m->mnt_sb->s_umount); + return dget(m->mnt_root); } static void init_once(void *foo) @@ -743,13 +770,16 @@ static int prepare_open(struct dentry *dentry, int oflag, int ro, static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, struct mq_attr *attr) { - struct vfsmount *mnt = current->nsproxy->ipc_ns->mq_mnt; - struct dentry *root = mnt->mnt_root; + struct vfsmount *mnt = mq_internal_mount(); + struct dentry *root; struct filename *name; struct path path; int fd, error; int ro; + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + audit_mq_open(oflag, mode, attr); if (IS_ERR(name = getname(u_name))) @@ -760,6 +790,7 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, goto out_putname; ro = mnt_want_write(mnt); /* we'll drop it in any case */ + root = mnt->mnt_root; inode_lock(d_inode(root)); path.dentry = lookup_one_len(name->name, root, strlen(name->name)); if (IS_ERR(path.dentry)) { @@ -808,6 +839,9 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; struct vfsmount *mnt = ipc_ns->mq_mnt; + if (!mnt) + return -ENOENT; + name = getname(u_name); if (IS_ERR(name)) return PTR_ERR(name); @@ -1534,28 +1568,26 @@ int mq_init_ns(struct ipc_namespace *ns) ns->mq_msgsize_max = DFLT_MSGSIZEMAX; ns->mq_msg_default = DFLT_MSG; ns->mq_msgsize_default = DFLT_MSGSIZE; + ns->mq_mnt = NULL; - ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); - if (IS_ERR(ns->mq_mnt)) { - int err = PTR_ERR(ns->mq_mnt); - ns->mq_mnt = NULL; - return err; - } return 0; } void mq_clear_sbinfo(struct ipc_namespace *ns) { - ns->mq_mnt->mnt_sb->s_fs_info = NULL; + if (ns->mq_mnt) + ns->mq_mnt->mnt_sb->s_fs_info = NULL; } void mq_put_mnt(struct ipc_namespace *ns) { - kern_unmount(ns->mq_mnt); + if (ns->mq_mnt) + kern_unmount(ns->mq_mnt); } static int __init init_mqueue_fs(void) { + struct vfsmount *m; int error; mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", @@ -1577,6 +1609,10 @@ static int __init init_mqueue_fs(void) if (error) goto out_filesystem; + m = kern_mount_data(&mqueue_fs_type, &init_ipc_ns); + if (IS_ERR(m)) + goto out_filesystem; + init_ipc_ns.mq_mnt = m; return 0; out_filesystem: