Skip to content

Commit

Permalink
Merge branch 'nsfs-ioctls' into HEAD
Browse files Browse the repository at this point in the history
From: Andrey Vagin <[email protected]>

Each namespace has an owning user namespace and now there is not way
to discover these relationships.

Pid and user namepaces are hierarchical. There is no way to discover
parent-child relationships too.

Why we may want to know relationships between namespaces?

One use would be visualization, in order to understand the running
system.  Another would be to answer the question: what capability does
process X have to perform operations on a resource governed by namespace
Y?

One more use-case (which usually called abnormal) is checkpoint/restart.
In CRIU we are going to dump and restore nested namespaces.

There [1] was a discussion about which interface to choose to determing
relationships between namespaces.

Eric suggested to add two ioctl-s [2]:
> Grumble, Grumble.  I think this may actually a case for creating ioctls
> for these two cases.  Now that random nsfs file descriptors are bind
> mountable the original reason for using proc files is not as pressing.
>
> One ioctl for the user namespace that owns a file descriptor.
> One ioctl for the parent namespace of a namespace file descriptor.

Here is an implementaions of these ioctl-s.

$ man man7/namespaces.7
...
Since  Linux  4.X,  the  following  ioctl(2)  calls are supported for
namespace file descriptors.  The correct syntax is:

      fd = ioctl(ns_fd, ioctl_type);

where ioctl_type is one of the following:

NS_GET_USERNS
      Returns a file descriptor that refers to an owning user names‐
      pace.

NS_GET_PARENT
      Returns  a  file descriptor that refers to a parent namespace.
      This ioctl(2) can be used for pid  and  user  namespaces.  For
      user namespaces, NS_GET_PARENT and NS_GET_USERNS have the same
      meaning.

In addition to generic ioctl(2) errors, the following  specific  ones
can occur:

EINVAL NS_GET_PARENT was called for a nonhierarchical namespace.

EPERM  The  requested  namespace  is outside of the current namespace
      scope.

[1] https://lkml.org/lkml/2016/7/6/158
[2] https://lkml.org/lkml/2016/7/9/101

Changes for v2:
* don't return ENOENT for init_user_ns and init_pid_ns. There is nothing
  outside of the init namespace, so we can return EPERM in this case too.
  > The fewer special cases the easier the code is to get
  > correct, and the easier it is to read. // Eric

Changes for v3:
* rename ns->get_owner() to ns->owner(). get_* usually means that it
  grabs a reference.

Cc: "Eric W. Biederman" <[email protected]>
Cc: James Bottomley <[email protected]>
Cc: "Michael Kerrisk (man-pages)" <[email protected]>
Cc: "W. Trevor King" <[email protected]>
Cc: Alexander Viro <[email protected]>
Cc: Serge Hallyn <[email protected]>
  • Loading branch information
ebiederm committed Sep 23, 2016
2 parents 93f0a88 + 6ad92bf commit 7872559
Show file tree
Hide file tree
Showing 15 changed files with 371 additions and 13 deletions.
6 changes: 6 additions & 0 deletions fs/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -3368,10 +3368,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}

static struct user_namespace *mntns_owner(struct ns_common *ns)
{
return to_mnt_ns(ns)->user_ns;
}

const struct proc_ns_operations mntns_operations = {
.name = "mnt",
.type = CLONE_NEWNS,
.get = mntns_get,
.put = mntns_put,
.install = mntns_install,
.owner = mntns_owner,
};
100 changes: 87 additions & 13 deletions fs/nsfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@
#include <linux/magic.h>
#include <linux/ktime.h>
#include <linux/seq_file.h>
#include <linux/user_namespace.h>
#include <linux/nsfs.h>

static struct vfsmount *nsfs_mnt;

static long ns_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg);
static const struct file_operations ns_file_operations = {
.llseek = no_llseek,
.unlocked_ioctl = ns_ioctl,
};

static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
Expand Down Expand Up @@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode)
ns->ops->put(ns);
}

void *ns_get_path(struct path *path, struct task_struct *task,
const struct proc_ns_operations *ns_ops)
static void *__ns_get_path(struct path *path, struct ns_common *ns)
{
struct vfsmount *mnt = mntget(nsfs_mnt);
struct qstr qname = { .name = "", };
struct dentry *dentry;
struct inode *inode;
struct ns_common *ns;
unsigned long d;

again:
ns = ns_ops->get(task);
if (!ns) {
mntput(mnt);
return ERR_PTR(-ENOENT);
}
rcu_read_lock();
d = atomic_long_read(&ns->stashed);
if (!d)
Expand All @@ -68,7 +65,7 @@ void *ns_get_path(struct path *path, struct task_struct *task,
if (!lockref_get_not_dead(&dentry->d_lockref))
goto slow;
rcu_read_unlock();
ns_ops->put(ns);
ns->ops->put(ns);
got_it:
path->mnt = mnt;
path->dentry = dentry;
Expand All @@ -77,7 +74,7 @@ void *ns_get_path(struct path *path, struct task_struct *task,
rcu_read_unlock();
inode = new_inode_pseudo(mnt->mnt_sb);
if (!inode) {
ns_ops->put(ns);
ns->ops->put(ns);
mntput(mnt);
return ERR_PTR(-ENOMEM);
}
Expand All @@ -95,17 +92,94 @@ void *ns_get_path(struct path *path, struct task_struct *task,
return ERR_PTR(-ENOMEM);
}
d_instantiate(dentry, inode);
dentry->d_fsdata = (void *)ns_ops;
dentry->d_fsdata = (void *)ns->ops;
d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
if (d) {
d_delete(dentry); /* make sure ->d_prune() does nothing */
dput(dentry);
mntput(mnt);
cpu_relax();
goto again;
return ERR_PTR(-EAGAIN);
}
goto got_it;
}

void *ns_get_path(struct path *path, struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
struct ns_common *ns;
void *ret;

again:
ns = ns_ops->get(task);
if (!ns)
return ERR_PTR(-ENOENT);

ret = __ns_get_path(path, ns);
if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN)
goto again;
return ret;
}

static int open_related_ns(struct ns_common *ns,
struct ns_common *(*get_ns)(struct ns_common *ns))
{
struct path path = {};
struct file *f;
void *err;
int fd;

fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0)
return fd;

while (1) {
struct ns_common *relative;

relative = get_ns(ns);
if (IS_ERR(relative)) {
put_unused_fd(fd);
return PTR_ERR(relative);
}

err = __ns_get_path(&path, relative);
if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN)
continue;
break;
}
if (IS_ERR(err)) {
put_unused_fd(fd);
return PTR_ERR(err);
}

f = dentry_open(&path, O_RDONLY, current_cred());
path_put(&path);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
} else
fd_install(fd, f);

return fd;
}

static long ns_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
struct ns_common *ns = get_proc_ns(file_inode(filp));

switch (ioctl) {
case NS_GET_USERNS:
return open_related_ns(ns, ns_get_owner);
case NS_GET_PARENT:
if (!ns->ops->get_parent)
return -EINVAL;
return open_related_ns(ns, ns->ops->get_parent);
default:
return -ENOTTY;
}
}

int ns_get_name(char *buf, size_t size, struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
Expand Down
2 changes: 2 additions & 0 deletions include/linux/proc_ns.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ struct proc_ns_operations {
struct ns_common *(*get)(struct task_struct *task);
void (*put)(struct ns_common *ns);
int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
struct user_namespace *(*owner)(struct ns_common *ns);
struct ns_common *(*get_parent)(struct ns_common *ns);
};

extern const struct proc_ns_operations netns_operations;
Expand Down
7 changes: 7 additions & 0 deletions include/linux/user_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
extern int proc_setgroups_show(struct seq_file *m, void *v);
extern bool userns_may_setgroups(const struct user_namespace *ns);
extern bool current_in_userns(const struct user_namespace *target_ns);

struct ns_common *ns_get_owner(struct ns_common *ns);
#else

static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
Expand Down Expand Up @@ -139,6 +141,11 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
{
return true;
}

static inline struct ns_common *ns_get_owner(struct ns_common *ns)
{
return ERR_PTR(-EPERM);
}
#endif

#endif /* _LINUX_USER_H */
13 changes: 13 additions & 0 deletions include/uapi/linux/nsfs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef __LINUX_NSFS_H
#define __LINUX_NSFS_H

#include <linux/ioctl.h>

#define NSIO 0xb7

/* Returns a file descriptor that refers to an owning user namespace */
#define NS_GET_USERNS _IO(NSIO, 0x1)
/* Returns a file descriptor that refers to a parent namespace */
#define NS_GET_PARENT _IO(NSIO, 0x2)

#endif /* __LINUX_NSFS_H */
6 changes: 6 additions & 0 deletions ipc/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,16 @@ static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
return 0;
}

static struct user_namespace *ipcns_owner(struct ns_common *ns)
{
return to_ipc_ns(ns)->user_ns;
}

const struct proc_ns_operations ipcns_operations = {
.name = "ipc",
.type = CLONE_NEWIPC,
.get = ipcns_get,
.put = ipcns_put,
.install = ipcns_install,
.owner = ipcns_owner,
};
6 changes: 6 additions & 0 deletions kernel/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -6421,12 +6421,18 @@ static void cgroupns_put(struct ns_common *ns)
put_cgroup_ns(to_cg_ns(ns));
}

static struct user_namespace *cgroupns_owner(struct ns_common *ns)
{
return to_cg_ns(ns)->user_ns;
}

const struct proc_ns_operations cgroupns_operations = {
.name = "cgroup",
.type = CLONE_NEWCGROUP,
.get = cgroupns_get,
.put = cgroupns_put,
.install = cgroupns_install,
.owner = cgroupns_owner,
};

static __init int cgroup_namespaces_init(void)
Expand Down
25 changes: 25 additions & 0 deletions kernel/pid_namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,12 +405,37 @@ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}

static struct ns_common *pidns_get_parent(struct ns_common *ns)
{
struct pid_namespace *active = task_active_pid_ns(current);
struct pid_namespace *pid_ns, *p;

/* See if the parent is in the current namespace */
pid_ns = p = to_pid_ns(ns)->parent;
for (;;) {
if (!p)
return ERR_PTR(-EPERM);
if (p == active)
break;
p = p->parent;
}

return &get_pid_ns(pid_ns)->ns;
}

static struct user_namespace *pidns_owner(struct ns_common *ns)
{
return to_pid_ns(ns)->user_ns;
}

const struct proc_ns_operations pidns_operations = {
.name = "pid",
.type = CLONE_NEWPID,
.get = pidns_get,
.put = pidns_put,
.install = pidns_install,
.owner = pidns_owner,
.get_parent = pidns_get_parent,
};

static __init int pid_namespaces_init(void)
Expand Down
25 changes: 25 additions & 0 deletions kernel/user_namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -1050,12 +1050,37 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return commit_creds(cred);
}

struct ns_common *ns_get_owner(struct ns_common *ns)
{
struct user_namespace *my_user_ns = current_user_ns();
struct user_namespace *owner, *p;

/* See if the owner is in the current user namespace */
owner = p = ns->ops->owner(ns);
for (;;) {
if (!p)
return ERR_PTR(-EPERM);
if (p == my_user_ns)
break;
p = p->parent;
}

return &get_user_ns(owner)->ns;
}

static struct user_namespace *userns_owner(struct ns_common *ns)
{
return to_user_ns(ns)->parent;
}

const struct proc_ns_operations userns_operations = {
.name = "user",
.type = CLONE_NEWUSER,
.get = userns_get,
.put = userns_put,
.install = userns_install,
.owner = userns_owner,
.get_parent = ns_get_owner,
};

static __init int user_namespaces_init(void)
Expand Down
6 changes: 6 additions & 0 deletions kernel/utsname.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,16 @@ static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new)
return 0;
}

static struct user_namespace *utsns_owner(struct ns_common *ns)
{
return to_uts_ns(ns)->user_ns;
}

const struct proc_ns_operations utsns_operations = {
.name = "uts",
.type = CLONE_NEWUTS,
.get = utsns_get,
.put = utsns_put,
.install = utsns_install,
.owner = utsns_owner,
};
6 changes: 6 additions & 0 deletions net/core/net_namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -1016,11 +1016,17 @@ static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}

static struct user_namespace *netns_owner(struct ns_common *ns)
{
return to_net_ns(ns)->user_ns;
}

const struct proc_ns_operations netns_operations = {
.name = "net",
.type = CLONE_NEWNET,
.get = netns_get,
.put = netns_put,
.install = netns_install,
.owner = netns_owner,
};
#endif
1 change: 1 addition & 0 deletions tools/testing/selftests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ TARGETS += memory-hotplug
TARGETS += mount
TARGETS += mqueue
TARGETS += net
TARGETS += nsfs
TARGETS += powerpc
TARGETS += pstore
TARGETS += ptrace
Expand Down
12 changes: 12 additions & 0 deletions tools/testing/selftests/nsfs/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
TEST_PROGS := owner pidns

CFLAGS := -Wall -Werror

all: owner pidns
owner: owner.c
pidns: pidns.c

clean:
$(RM) owner pidns

include ../lib.mk
Loading

0 comments on commit 7872559

Please sign in to comment.