Skip to content

Commit 87c31b3

Browse files
committed
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace related fixes from Eric Biederman: "As these are bug fixes almost all of thes changes are marked for backporting to stable. The first change (implicitly adding MNT_NODEV on remount) addresses a regression that was created when security issues with unprivileged remount were closed. I go on to update the remount test to make it easy to detect if this issue reoccurs. Then there are a handful of mount and umount related fixes. Then half of the changes deal with the a recently discovered design bug in the permission checks of gid_map. Unix since the beginning has allowed setting group permissions on files to less than the user and other permissions (aka ---rwx---rwx). As the unix permission checks stop as soon as a group matches, and setgroups allows setting groups that can not later be dropped, results in a situtation where it is possible to legitimately use a group to assign fewer privileges to a process. Which means dropping a group can increase a processes privileges. The fix I have adopted is that gid_map is now no longer writable without privilege unless the new file /proc/self/setgroups has been set to permanently disable setgroups. The bulk of user namespace using applications even the applications using applications using user namespaces without privilege remain unaffected by this change. Unfortunately this ix breaks a couple user space applications, that were relying on the problematic behavior (one of which was tools/selftests/mount/unprivileged-remount-test.c). To hopefully prevent needing a regression fix on top of my security fix I rounded folks who work with the container implementations mostly like to be affected and encouraged them to test the changes. > So far nothing broke on my libvirt-lxc test bed. :-) > Tested with openSUSE 13.2 and libvirt 1.2.9. > Tested-by: Richard Weinberger <[email protected]> > Tested on Fedora20 with libvirt 1.2.11, works fine. > Tested-by: Chen Hanxiao <[email protected]> > Ok, thanks - yes, unprivileged lxc is working fine with your kernels. > Just to be sure I was testing the right thing I also tested using > my unprivileged nsexec testcases, and they failed on setgroup/setgid > as now expected, and succeeded there without your patches. > Tested-by: Serge Hallyn <[email protected]> > I tested this with Sandstorm. It breaks as is and it works if I add > the setgroups thing. > Tested-by: Andy Lutomirski <[email protected]> # breaks things as designed :(" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: userns: Unbreak the unprivileged remount tests userns; Correct the comment in map_write userns: Allow setting gid_maps without privilege when setgroups is disabled userns: Add a knob to disable setgroups on a per user namespace basis userns: Rename id_map_mutex to userns_state_mutex userns: Only allow the creator of the userns unprivileged mappings userns: Check euid no fsuid when establishing an unprivileged uid mapping userns: Don't allow unprivileged creation of gid mappings userns: Don't allow setgroups until a gid mapping has been setablished userns: Document what the invariant required for safe unprivileged mappings. groups: Consolidate the setgroups permission checks mnt: Clear mnt_expire during pivot_root mnt: Carefully set CL_UNPRIVILEGED in clone_mnt mnt: Move the clear of MNT_LOCKED from copy_tree to it's callers. umount: Do not allow unmounting rootfs. umount: Disallow unprivileged mount force mnt: Update unprivileged remount test mnt: Implicitly add MNT_NODEV on remount when it was implicitly added by mount
2 parents f045bbb + db86da7 commit 87c31b3

File tree

11 files changed

+374
-55
lines changed

11 files changed

+374
-55
lines changed

arch/s390/kernel/compat_linux.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
249249
struct group_info *group_info;
250250
int retval;
251251

252-
if (!capable(CAP_SETGID))
252+
if (!may_setgroups())
253253
return -EPERM;
254254
if ((unsigned)gidsetsize > NGROUPS_MAX)
255255
return -EINVAL;

fs/namespace.c

+15-3
Original file line numberDiff line numberDiff line change
@@ -963,7 +963,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
963963
}
964964

965965
/* Don't allow unprivileged users to reveal what is under a mount */
966-
if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
966+
if ((flag & CL_UNPRIVILEGED) &&
967+
(!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
967968
mnt->mnt.mnt_flags |= MNT_LOCKED;
968969

969970
atomic_inc(&sb->s_active);
@@ -1544,6 +1545,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
15441545
goto dput_and_out;
15451546
if (mnt->mnt.mnt_flags & MNT_LOCKED)
15461547
goto dput_and_out;
1548+
retval = -EPERM;
1549+
if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1550+
goto dput_and_out;
15471551

15481552
retval = do_umount(mnt, flags);
15491553
dput_and_out:
@@ -1606,7 +1610,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
16061610
if (IS_ERR(q))
16071611
return q;
16081612

1609-
q->mnt.mnt_flags &= ~MNT_LOCKED;
16101613
q->mnt_mountpoint = mnt->mnt_mountpoint;
16111614

16121615
p = mnt;
@@ -2097,7 +2100,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
20972100
}
20982101
if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
20992102
!(mnt_flags & MNT_NODEV)) {
2100-
return -EPERM;
2103+
/* Was the nodev implicitly added in mount? */
2104+
if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
2105+
!(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
2106+
mnt_flags |= MNT_NODEV;
2107+
} else {
2108+
return -EPERM;
2109+
}
21012110
}
21022111
if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
21032112
!(mnt_flags & MNT_NOSUID)) {
@@ -2958,6 +2967,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
29582967
/* mount new_root on / */
29592968
attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
29602969
touch_mnt_namespace(current->nsproxy->mnt_ns);
2970+
/* A moved mount should not expire automatically */
2971+
list_del_init(&new_mnt->mnt_expire);
29612972
unlock_mount_hash();
29622973
chroot_fs_refs(&root, &new);
29632974
put_mountpoint(root_mp);
@@ -3002,6 +3013,7 @@ static void __init init_mount_tree(void)
30023013

30033014
root.mnt = mnt;
30043015
root.dentry = mnt->mnt_root;
3016+
mnt->mnt_flags |= MNT_LOCKED;
30053017

30063018
set_fs_pwd(current->fs, &root);
30073019
set_fs_root(current->fs, &root);

fs/pnode.c

+1
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ static int propagate_one(struct mount *m)
242242
child = copy_tree(last_source, last_source->mnt.mnt_root, type);
243243
if (IS_ERR(child))
244244
return PTR_ERR(child);
245+
child->mnt.mnt_flags &= ~MNT_LOCKED;
245246
mnt_set_mountpoint(m, mp, child);
246247
last_dest = m;
247248
last_source = child;

fs/proc/base.c

+53
Original file line numberDiff line numberDiff line change
@@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = {
24642464
.llseek = seq_lseek,
24652465
.release = proc_id_map_release,
24662466
};
2467+
2468+
static int proc_setgroups_open(struct inode *inode, struct file *file)
2469+
{
2470+
struct user_namespace *ns = NULL;
2471+
struct task_struct *task;
2472+
int ret;
2473+
2474+
ret = -ESRCH;
2475+
task = get_proc_task(inode);
2476+
if (task) {
2477+
rcu_read_lock();
2478+
ns = get_user_ns(task_cred_xxx(task, user_ns));
2479+
rcu_read_unlock();
2480+
put_task_struct(task);
2481+
}
2482+
if (!ns)
2483+
goto err;
2484+
2485+
if (file->f_mode & FMODE_WRITE) {
2486+
ret = -EACCES;
2487+
if (!ns_capable(ns, CAP_SYS_ADMIN))
2488+
goto err_put_ns;
2489+
}
2490+
2491+
ret = single_open(file, &proc_setgroups_show, ns);
2492+
if (ret)
2493+
goto err_put_ns;
2494+
2495+
return 0;
2496+
err_put_ns:
2497+
put_user_ns(ns);
2498+
err:
2499+
return ret;
2500+
}
2501+
2502+
static int proc_setgroups_release(struct inode *inode, struct file *file)
2503+
{
2504+
struct seq_file *seq = file->private_data;
2505+
struct user_namespace *ns = seq->private;
2506+
int ret = single_release(inode, file);
2507+
put_user_ns(ns);
2508+
return ret;
2509+
}
2510+
2511+
static const struct file_operations proc_setgroups_operations = {
2512+
.open = proc_setgroups_open,
2513+
.write = proc_setgroups_write,
2514+
.read = seq_read,
2515+
.llseek = seq_lseek,
2516+
.release = proc_setgroups_release,
2517+
};
24672518
#endif /* CONFIG_USER_NS */
24682519

24692520
static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
25722623
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
25732624
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
25742625
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
2626+
REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
25752627
#endif
25762628
#ifdef CONFIG_CHECKPOINT_RESTORE
25772629
REG("timers", S_IRUGO, proc_timers_operations),
@@ -2916,6 +2968,7 @@ static const struct pid_entry tid_base_stuff[] = {
29162968
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
29172969
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
29182970
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
2971+
REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
29192972
#endif
29202973
};
29212974

include/linux/cred.h

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ extern void groups_free(struct group_info *);
6868
extern int set_current_groups(struct group_info *);
6969
extern void set_groups(struct cred *, struct group_info *);
7070
extern int groups_search(const struct group_info *, kgid_t);
71+
extern bool may_setgroups(void);
7172

7273
/* access the groups "array" with this macro */
7374
#define GROUP_AT(gi, i) \

include/linux/user_namespace.h

+12
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */
1818
} extent[UID_GID_MAP_MAX_EXTENTS];
1919
};
2020

21+
#define USERNS_SETGROUPS_ALLOWED 1UL
22+
23+
#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
24+
2125
struct user_namespace {
2226
struct uid_gid_map uid_map;
2327
struct uid_gid_map gid_map;
@@ -28,6 +32,7 @@ struct user_namespace {
2832
kuid_t owner;
2933
kgid_t group;
3034
struct ns_common ns;
35+
unsigned long flags;
3136

3237
/* Register of per-UID persistent keyrings for this namespace */
3338
#ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -64,6 +69,9 @@ extern const struct seq_operations proc_projid_seq_operations;
6469
extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
6570
extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
6671
extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
72+
extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
73+
extern int proc_setgroups_show(struct seq_file *m, void *v);
74+
extern bool userns_may_setgroups(const struct user_namespace *ns);
6775
#else
6876

6977
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -88,6 +96,10 @@ static inline void put_user_ns(struct user_namespace *ns)
8896
{
8997
}
9098

99+
static inline bool userns_may_setgroups(const struct user_namespace *ns)
100+
{
101+
return true;
102+
}
91103
#endif
92104

93105
#endif /* _LINUX_USER_H */

kernel/groups.c

+10-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <linux/slab.h>
77
#include <linux/security.h>
88
#include <linux/syscalls.h>
9+
#include <linux/user_namespace.h>
910
#include <asm/uaccess.h>
1011

1112
/* init to 2 - one for init_task, one to ensure it is never freed */
@@ -213,6 +214,14 @@ SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
213214
return i;
214215
}
215216

217+
bool may_setgroups(void)
218+
{
219+
struct user_namespace *user_ns = current_user_ns();
220+
221+
return ns_capable(user_ns, CAP_SETGID) &&
222+
userns_may_setgroups(user_ns);
223+
}
224+
216225
/*
217226
* SMP: Our groups are copy-on-write. We can set them safely
218227
* without another task interfering.
@@ -223,7 +232,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
223232
struct group_info *group_info;
224233
int retval;
225234

226-
if (!ns_capable(current_user_ns(), CAP_SETGID))
235+
if (!may_setgroups())
227236
return -EPERM;
228237
if ((unsigned)gidsetsize > NGROUPS_MAX)
229238
return -EINVAL;

kernel/uid16.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
176176
struct group_info *group_info;
177177
int retval;
178178

179-
if (!ns_capable(current_user_ns(), CAP_SETGID))
179+
if (!may_setgroups())
180180
return -EPERM;
181181
if ((unsigned)gidsetsize > NGROUPS_MAX)
182182
return -EINVAL;

kernel/user.c

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ struct user_namespace init_user_ns = {
5454
#ifdef CONFIG_USER_NS
5555
.ns.ops = &userns_operations,
5656
#endif
57+
.flags = USERNS_INIT_FLAGS,
5758
#ifdef CONFIG_PERSISTENT_KEYRINGS
5859
.persistent_keyring_register_sem =
5960
__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),

0 commit comments

Comments
 (0)