Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/ebiederm/user-namespace

Pull namespace changes from Eric Biederman:
 "This is an assorted mishmash of small cleanups, enhancements and bug
  fixes.

  The major theme is user namespace mount restrictions.  nsown_capable
  is killed as it encourages not thinking about details that need to be
  considered.  A very hard to hit pid namespace exiting bug was finally
  tracked and fixed.  A couple of cleanups to the basic namespace
  infrastructure.

  Finally there is an enhancement that makes per user namespace
  capabilities usable as capabilities, and an enhancement that allows
  the per userns root to nice other processes in the user namespace"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  userns:  Kill nsown_capable it makes the wrong thing easy
  capabilities: allow nice if we are privileged
  pidns: Don't have unshare(CLONE_NEWPID) imply CLONE_THREAD
  userns: Allow PR_CAPBSET_DROP in a user namespace.
  namespaces: Simplify copy_namespaces so it is clear what is going on.
  pidns: Fix hang in zap_pid_ns_processes by sending a potentially extra wakeup
  sysfs: Restrict mounting sysfs
  userns: Better restrictions on when proc and sysfs can be mounted
  vfs: Don't copy mount bind mounts of /proc/<pid>/ns/mnt between namespaces
  kernel/nsproxy.c: Improving a snippet of code.
  proc: Restrict mounting the proc filesystem
  vfs: Lock in place mounts from more privileged users
  • Loading branch information
torvalds committed Sep 7, 2013
2 parents 11c7b03 + c7b96ac commit c7c4591
Show file tree
Hide file tree
Showing 27 changed files with 177 additions and 104 deletions.
121 changes: 95 additions & 26 deletions fs/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;

/* Don't allow unprivileged users to reveal what is under a mount */
if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
mnt->mnt.mnt_flags |= MNT_LOCKED;

atomic_inc(&sb->s_active);
mnt->mnt.mnt_sb = sb;
mnt->mnt.mnt_root = dget(root);
Expand Down Expand Up @@ -1327,6 +1331,8 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
if (!check_mnt(mnt))
goto dput_and_out;
if (mnt->mnt.mnt_flags & MNT_LOCKED)
goto dput_and_out;

retval = do_umount(mnt, flags);
dput_and_out:
Expand All @@ -1349,14 +1355,11 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)

#endif

static bool mnt_ns_loop(struct path *path)
static bool is_mnt_ns_file(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
* mount namespace loop?
*/
struct inode *inode = path->dentry->d_inode;
/* Is this a proxy for a mount namespace? */
struct inode *inode = dentry->d_inode;
struct proc_ns *ei;
struct mnt_namespace *mnt_ns;

if (!proc_ns_inode(inode))
return false;
Expand All @@ -1365,7 +1368,19 @@ static bool mnt_ns_loop(struct path *path)
if (ei->ns_ops != &mntns_operations)
return false;

mnt_ns = ei->ns;
return true;
}

static bool mnt_ns_loop(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
* mount namespace loop?
*/
struct mnt_namespace *mnt_ns;
if (!is_mnt_ns_file(dentry))
return false;

mnt_ns = get_proc_ns(dentry->d_inode)->ns;
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}

Expand All @@ -1374,13 +1389,17 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
{
struct mount *res, *p, *q, *r, *parent;

if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
return ERR_PTR(-EINVAL);

if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
return ERR_PTR(-EINVAL);

res = q = clone_mnt(mnt, dentry, flag);
if (IS_ERR(q))
return q;

q->mnt.mnt_flags &= ~MNT_LOCKED;
q->mnt_mountpoint = mnt->mnt_mountpoint;

p = mnt;
Expand All @@ -1390,7 +1409,13 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
continue;

for (s = r; s; s = next_mnt(s, r)) {
if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
if (!(flag & CL_COPY_UNBINDABLE) &&
IS_MNT_UNBINDABLE(s)) {
s = skip_mnt_tree(s);
continue;
}
if (!(flag & CL_COPY_MNT_NS_FILE) &&
is_mnt_ns_file(s->mnt.mnt_root)) {
s = skip_mnt_tree(s);
continue;
}
Expand Down Expand Up @@ -1696,6 +1721,19 @@ static int do_change_type(struct path *path, int flag)
return err;
}

static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
{
struct mount *child;
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
if (!is_subdir(child->mnt_mountpoint, dentry))
continue;

if (child->mnt.mnt_flags & MNT_LOCKED)
return true;
}
return false;
}

/*
* do loopback mount.
*/
Expand All @@ -1713,7 +1751,7 @@ static int do_loopback(struct path *path, const char *old_name,
return err;

err = -EINVAL;
if (mnt_ns_loop(&old_path))
if (mnt_ns_loop(old_path.dentry))
goto out;

mp = lock_mount(path);
Expand All @@ -1731,8 +1769,11 @@ static int do_loopback(struct path *path, const char *old_name,
if (!check_mnt(parent) || !check_mnt(old))
goto out2;

if (!recurse && has_locked_children(old, old_path.dentry))
goto out2;

if (recurse)
mnt = copy_tree(old, old_path.dentry, 0);
mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
else
mnt = clone_mnt(old, old_path.dentry, 0);

Expand All @@ -1741,6 +1782,8 @@ static int do_loopback(struct path *path, const char *old_name,
goto out2;
}

mnt->mnt.mnt_flags &= ~MNT_LOCKED;

err = graft_tree(mnt, parent, mp);
if (err) {
br_write_lock(&vfsmount_lock);
Expand Down Expand Up @@ -1853,6 +1896,9 @@ static int do_move_mount(struct path *path, const char *old_name)
if (!check_mnt(p) || !check_mnt(old))
goto out1;

if (old->mnt.mnt_flags & MNT_LOCKED)
goto out1;

err = -EINVAL;
if (old_path.dentry != old_path.mnt->mnt_root)
goto out1;
Expand Down Expand Up @@ -2389,7 +2435,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,

namespace_lock();
/* First pass: copy the tree topology */
copy_flags = CL_COPY_ALL | CL_EXPIRE;
copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
if (user_ns != mnt_ns->user_ns)
copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
Expand Down Expand Up @@ -2424,6 +2470,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
}
p = next_mnt(p, old);
q = next_mnt(q, new);
if (!q)
break;
while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(p, old);
}
namespace_unlock();

Expand Down Expand Up @@ -2630,6 +2680,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out4;
if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
goto out4;
if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
goto out4;
error = -ENOENT;
if (d_unlinked(new.dentry))
goto out4;
Expand All @@ -2653,6 +2705,10 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
br_write_lock(&vfsmount_lock);
detach_mnt(new_mnt, &parent_path);
detach_mnt(root_mnt, &root_parent);
if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
new_mnt->mnt.mnt_flags |= MNT_LOCKED;
root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
}
/* mount old root on put_old */
attach_mnt(root_mnt, old_mnt, old_mp);
/* mount new_root on / */
Expand Down Expand Up @@ -2811,25 +2867,38 @@ bool current_chrooted(void)
return chrooted;
}

void update_mnt_policy(struct user_namespace *userns)
bool fs_fully_visible(struct file_system_type *type)
{
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
struct mount *mnt;
bool visible = false;

down_read(&namespace_sem);
if (unlikely(!ns))
return false;

namespace_lock();
list_for_each_entry(mnt, &ns->list, mnt_list) {
switch (mnt->mnt.mnt_sb->s_magic) {
case SYSFS_MAGIC:
userns->may_mount_sysfs = true;
break;
case PROC_SUPER_MAGIC:
userns->may_mount_proc = true;
break;
struct mount *child;
if (mnt->mnt.mnt_sb->s_type != type)
continue;

/* This mount is not fully visible if there are any child mounts
* that cover anything except for empty directories.
*/
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
struct inode *inode = child->mnt_mountpoint->d_inode;
if (!S_ISDIR(inode->i_mode))
goto next;
if (inode->i_nlink != 2)
goto next;
}
if (userns->may_mount_sysfs && userns->may_mount_proc)
break;
visible = true;
goto found;
next: ;
}
up_read(&namespace_sem);
found:
namespace_unlock();
return visible;
}

static void *mntns_get(struct task_struct *task)
Expand Down Expand Up @@ -2860,8 +2929,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
struct path root;

if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
!nsown_capable(CAP_SYS_CHROOT) ||
!nsown_capable(CAP_SYS_ADMIN))
!ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;

if (fs->users != 1)
Expand Down
2 changes: 1 addition & 1 deletion fs/open.c
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
goto dput_and_out;

error = -EPERM;
if (!nsown_capable(CAP_SYS_CHROOT))
if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
goto dput_and_out;
error = security_path_chroot(&path);
if (error)
Expand Down
5 changes: 4 additions & 1 deletion fs/pnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@

#define CL_EXPIRE 0x01
#define CL_SLAVE 0x02
#define CL_COPY_ALL 0x04
#define CL_COPY_UNBINDABLE 0x04
#define CL_MAKE_SHARED 0x08
#define CL_PRIVATE 0x10
#define CL_SHARED_TO_SLAVE 0x20
#define CL_UNPRIVILEGED 0x40
#define CL_COPY_MNT_NS_FILE 0x80

#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE)

static inline void set_mnt_shared(struct mount *mnt)
{
Expand Down
6 changes: 5 additions & 1 deletion fs/proc/root.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
ns = task_active_pid_ns(current);
options = data;

if (!current_user_ns()->may_mount_proc)
if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
return ERR_PTR(-EPERM);

/* Does the mounter have privilege over the pid namespace? */
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
}

Expand Down
11 changes: 9 additions & 2 deletions fs/sysfs/mount.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,15 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
struct super_block *sb;
int error;

if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
return ERR_PTR(-EPERM);
if (!(flags & MS_KERNMOUNT)) {
if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
return ERR_PTR(-EPERM);

for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
if (!kobj_ns_current_may_mount(type))
return ERR_PTR(-EPERM);
}
}

info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
Expand Down
1 change: 0 additions & 1 deletion include/linux/capability.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
struct user_namespace *ns, int cap);
extern bool capable(int cap);
extern bool ns_capable(struct user_namespace *ns, int cap);
extern bool nsown_capable(int cap);
extern bool inode_capable(const struct inode *inode, int cap);
extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);

Expand Down
1 change: 1 addition & 0 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1900,6 +1900,7 @@ extern int vfs_ustat(dev_t, struct kstatfs *);
extern int freeze_super(struct super_block *super);
extern int thaw_super(struct super_block *super);
extern bool our_mnt(struct vfsmount *mnt);
extern bool fs_fully_visible(struct file_system_type *);

extern int current_umask(void);

Expand Down
2 changes: 2 additions & 0 deletions include/linux/kobject_ns.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ enum kobj_ns_type {
*/
struct kobj_ns_type_operations {
enum kobj_ns_type type;
bool (*current_may_mount)(void);
void *(*grab_current_ns)(void);
const void *(*netlink_ns)(struct sock *sk);
const void *(*initial_ns)(void);
Expand All @@ -50,6 +51,7 @@ int kobj_ns_type_registered(enum kobj_ns_type type);
const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);

bool kobj_ns_current_may_mount(enum kobj_ns_type type);
void *kobj_ns_grab_current(enum kobj_ns_type type);
const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
const void *kobj_ns_initial(enum kobj_ns_type type);
Expand Down
1 change: 1 addition & 0 deletions include/linux/mount.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ struct mnt_namespace;
#define MNT_INTERNAL 0x4000

#define MNT_LOCK_READONLY 0x400000
#define MNT_LOCKED 0x800000

struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */
Expand Down
4 changes: 0 additions & 4 deletions include/linux/user_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ struct user_namespace {
kuid_t owner;
kgid_t group;
unsigned int proc_inum;
bool may_mount_sysfs;
bool may_mount_proc;
};

extern struct user_namespace init_user_ns;
Expand Down Expand Up @@ -85,6 +83,4 @@ static inline void put_user_ns(struct user_namespace *ns)

#endif

void update_mnt_policy(struct user_namespace *userns);

#endif /* _LINUX_USER_H */
2 changes: 1 addition & 1 deletion ipc/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new)
{
struct ipc_namespace *ns = new;
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
!nsown_capable(CAP_SYS_ADMIN))
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;

/* Ditch state from the old ipc namespace */
Expand Down
12 changes: 0 additions & 12 deletions kernel/capability.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,18 +432,6 @@ bool capable(int cap)
}
EXPORT_SYMBOL(capable);

/**
* nsown_capable - Check superior capability to one's own user_ns
* @cap: The capability in question
*
* Return true if the current task has the given superior capability
* targeted at its own user namespace.
*/
bool nsown_capable(int cap)
{
return ns_capable(current_user_ns(), cap);
}

/**
* inode_capable - Check superior capability over inode
* @inode: The inode in question
Expand Down
Loading

0 comments on commit c7c4591

Please sign in to comment.