Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/ebiederm/user-namespace

Pull user namespace related fixes from Eric Biederman:
 "As these are bug fixes almost all of thes changes are marked for
  backporting to stable.

  The first change (implicitly adding MNT_NODEV on remount) addresses a
  regression that was created when security issues with unprivileged
  remount were closed.  I go on to update the remount test to make it
  easy to detect if this issue reoccurs.

  Then there are a handful of mount and umount related fixes.

  Then half of the changes deal with the a recently discovered design
  bug in the permission checks of gid_map.  Unix since the beginning has
  allowed setting group permissions on files to less than the user and
  other permissions (aka ---rwx---rwx).  As the unix permission checks
  stop as soon as a group matches, and setgroups allows setting groups
  that can not later be dropped, results in a situtation where it is
  possible to legitimately use a group to assign fewer privileges to a
  process.  Which means dropping a group can increase a processes
  privileges.

  The fix I have adopted is that gid_map is now no longer writable
  without privilege unless the new file /proc/self/setgroups has been
  set to permanently disable setgroups.

  The bulk of user namespace using applications even the applications
  using applications using user namespaces without privilege remain
  unaffected by this change.  Unfortunately this ix breaks a couple user
  space applications, that were relying on the problematic behavior (one
  of which was tools/selftests/mount/unprivileged-remount-test.c).

  To hopefully prevent needing a regression fix on top of my security
  fix I rounded folks who work with the container implementations mostly
  like to be affected and encouraged them to test the changes.

    > So far nothing broke on my libvirt-lxc test bed. :-)
    > Tested with openSUSE 13.2 and libvirt 1.2.9.
    > Tested-by: Richard Weinberger <[email protected]>

    > Tested on Fedora20 with libvirt 1.2.11, works fine.
    > Tested-by: Chen Hanxiao <[email protected]>

    > Ok, thanks - yes, unprivileged lxc is working fine with your kernels.
    > Just to be sure I was testing the right thing I also tested using
    > my unprivileged nsexec testcases, and they failed on setgroup/setgid
    > as now expected, and succeeded there without your patches.
    > Tested-by: Serge Hallyn <[email protected]>

    > I tested this with Sandstorm.  It breaks as is and it works if I add
    > the setgroups thing.
    > Tested-by: Andy Lutomirski <[email protected]> # breaks things as designed :("

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  userns: Unbreak the unprivileged remount tests
  userns; Correct the comment in map_write
  userns: Allow setting gid_maps without privilege when setgroups is disabled
  userns: Add a knob to disable setgroups on a per user namespace basis
  userns: Rename id_map_mutex to userns_state_mutex
  userns: Only allow the creator of the userns unprivileged mappings
  userns: Check euid no fsuid when establishing an unprivileged uid mapping
  userns: Don't allow unprivileged creation of gid mappings
  userns: Don't allow setgroups until a gid mapping has been setablished
  userns: Document what the invariant required for safe unprivileged mappings.
  groups: Consolidate the setgroups permission checks
  mnt: Clear mnt_expire during pivot_root
  mnt: Carefully set CL_UNPRIVILEGED in clone_mnt
  mnt: Move the clear of MNT_LOCKED from copy_tree to it's callers.
  umount: Do not allow unmounting rootfs.
  umount: Disallow unprivileged mount force
  mnt: Update unprivileged remount test
  mnt: Implicitly add MNT_NODEV on remount when it was implicitly added by mount
  • Loading branch information
torvalds committed Dec 17, 2014
2 parents f045bbb + db86da7 commit 87c31b3
Show file tree
Hide file tree
Showing 11 changed files with 374 additions and 55 deletions.
2 changes: 1 addition & 1 deletion arch/s390/kernel/compat_linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
struct group_info *group_info;
int retval;

if (!capable(CAP_SETGID))
if (!may_setgroups())
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
Expand Down
18 changes: 15 additions & 3 deletions fs/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
}

/* Don't allow unprivileged users to reveal what is under a mount */
if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
if ((flag & CL_UNPRIVILEGED) &&
(!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
mnt->mnt.mnt_flags |= MNT_LOCKED;

atomic_inc(&sb->s_active);
Expand Down Expand Up @@ -1544,6 +1545,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
if (mnt->mnt.mnt_flags & MNT_LOCKED)
goto dput_and_out;
retval = -EPERM;
if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
goto dput_and_out;

retval = do_umount(mnt, flags);
dput_and_out:
Expand Down Expand Up @@ -1606,7 +1610,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
if (IS_ERR(q))
return q;

q->mnt.mnt_flags &= ~MNT_LOCKED;
q->mnt_mountpoint = mnt->mnt_mountpoint;

p = mnt;
Expand Down Expand Up @@ -2097,7 +2100,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
!(mnt_flags & MNT_NODEV)) {
return -EPERM;
/* Was the nodev implicitly added in mount? */
if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
!(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
mnt_flags |= MNT_NODEV;
} else {
return -EPERM;
}
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
!(mnt_flags & MNT_NOSUID)) {
Expand Down Expand Up @@ -2958,6 +2967,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* mount new_root on / */
attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
touch_mnt_namespace(current->nsproxy->mnt_ns);
/* A moved mount should not expire automatically */
list_del_init(&new_mnt->mnt_expire);
unlock_mount_hash();
chroot_fs_refs(&root, &new);
put_mountpoint(root_mp);
Expand Down Expand Up @@ -3002,6 +3013,7 @@ static void __init init_mount_tree(void)

root.mnt = mnt;
root.dentry = mnt->mnt_root;
mnt->mnt_flags |= MNT_LOCKED;

set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
Expand Down
1 change: 1 addition & 0 deletions fs/pnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ static int propagate_one(struct mount *m)
child = copy_tree(last_source, last_source->mnt.mnt_root, type);
if (IS_ERR(child))
return PTR_ERR(child);
child->mnt.mnt_flags &= ~MNT_LOCKED;
mnt_set_mountpoint(m, mp, child);
last_dest = m;
last_source = child;
Expand Down
53 changes: 53 additions & 0 deletions fs/proc/base.c
Original file line number Diff line number Diff line change
Expand Up @@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = {
.llseek = seq_lseek,
.release = proc_id_map_release,
};

static int proc_setgroups_open(struct inode *inode, struct file *file)
{
struct user_namespace *ns = NULL;
struct task_struct *task;
int ret;

ret = -ESRCH;
task = get_proc_task(inode);
if (task) {
rcu_read_lock();
ns = get_user_ns(task_cred_xxx(task, user_ns));
rcu_read_unlock();
put_task_struct(task);
}
if (!ns)
goto err;

if (file->f_mode & FMODE_WRITE) {
ret = -EACCES;
if (!ns_capable(ns, CAP_SYS_ADMIN))
goto err_put_ns;
}

ret = single_open(file, &proc_setgroups_show, ns);
if (ret)
goto err_put_ns;

return 0;
err_put_ns:
put_user_ns(ns);
err:
return ret;
}

static int proc_setgroups_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
struct user_namespace *ns = seq->private;
int ret = single_release(inode, file);
put_user_ns(ns);
return ret;
}

static const struct file_operations proc_setgroups_operations = {
.open = proc_setgroups_open,
.write = proc_setgroups_write,
.read = seq_read,
.llseek = seq_lseek,
.release = proc_setgroups_release,
};
#endif /* CONFIG_USER_NS */

static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
Expand Down Expand Up @@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
#ifdef CONFIG_CHECKPOINT_RESTORE
REG("timers", S_IRUGO, proc_timers_operations),
Expand Down Expand Up @@ -2916,6 +2968,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
};

Expand Down
1 change: 1 addition & 0 deletions include/linux/cred.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ extern void groups_free(struct group_info *);
extern int set_current_groups(struct group_info *);
extern void set_groups(struct cred *, struct group_info *);
extern int groups_search(const struct group_info *, kgid_t);
extern bool may_setgroups(void);

/* access the groups "array" with this macro */
#define GROUP_AT(gi, i) \
Expand Down
12 changes: 12 additions & 0 deletions include/linux/user_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */
} extent[UID_GID_MAP_MAX_EXTENTS];
};

#define USERNS_SETGROUPS_ALLOWED 1UL

#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED

struct user_namespace {
struct uid_gid_map uid_map;
struct uid_gid_map gid_map;
Expand All @@ -28,6 +32,7 @@ struct user_namespace {
kuid_t owner;
kgid_t group;
struct ns_common ns;
unsigned long flags;

/* Register of per-UID persistent keyrings for this namespace */
#ifdef CONFIG_PERSISTENT_KEYRINGS
Expand Down Expand Up @@ -64,6 +69,9 @@ extern const struct seq_operations proc_projid_seq_operations;
extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
extern int proc_setgroups_show(struct seq_file *m, void *v);
extern bool userns_may_setgroups(const struct user_namespace *ns);
#else

static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
Expand All @@ -88,6 +96,10 @@ static inline void put_user_ns(struct user_namespace *ns)
{
}

static inline bool userns_may_setgroups(const struct user_namespace *ns)
{
return true;
}
#endif

#endif /* _LINUX_USER_H */
11 changes: 10 additions & 1 deletion kernel/groups.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/user_namespace.h>
#include <asm/uaccess.h>

/* init to 2 - one for init_task, one to ensure it is never freed */
Expand Down Expand Up @@ -213,6 +214,14 @@ SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
return i;
}

bool may_setgroups(void)
{
struct user_namespace *user_ns = current_user_ns();

return ns_capable(user_ns, CAP_SETGID) &&
userns_may_setgroups(user_ns);
}

/*
* SMP: Our groups are copy-on-write. We can set them safely
* without another task interfering.
Expand All @@ -223,7 +232,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
struct group_info *group_info;
int retval;

if (!ns_capable(current_user_ns(), CAP_SETGID))
if (!may_setgroups())
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
Expand Down
2 changes: 1 addition & 1 deletion kernel/uid16.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
struct group_info *group_info;
int retval;

if (!ns_capable(current_user_ns(), CAP_SETGID))
if (!may_setgroups())
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
Expand Down
1 change: 1 addition & 0 deletions kernel/user.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ struct user_namespace init_user_ns = {
#ifdef CONFIG_USER_NS
.ns.ops = &userns_operations,
#endif
.flags = USERNS_INIT_FLAGS,
#ifdef CONFIG_PERSISTENT_KEYRINGS
.persistent_keyring_register_sem =
__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
Expand Down
Loading

0 comments on commit 87c31b3

Please sign in to comment.