Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/ebiederm/user-namespace

Pull exec/proc updates from Eric Biederman:
 "This contains two significant pieces of work: the work to sort out
  proc_flush_task, and the work to solve a deadlock between strace and
  exec.

  Fixing proc_flush_task so that it no longer requires a persistent
  mount makes improvements to proc possible. The removal of the
  persistent mount solves an old regression that that caused the hidepid
  mount option to only work on remount not on mount. The regression was
  found and reported by the Android folks. This further allows Alexey
  Gladkov's work making proc mount options specific to an individual
  mount of proc to move forward.

  The work on exec starts solving a long standing issue with exec that
  it takes mutexes of blocking userspace applications, which makes exec
  extremely deadlock prone. For the moment this adds a second mutex with
  a narrower scope that handles all of the easy cases. Which makes the
  tricky cases easy to spot. With a little luck the code to solve those
  deadlocks will be ready by next merge window"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (25 commits)
  signal: Extend exec_id to 64bits
  pidfd: Use new infrastructure to fix deadlocks in execve
  perf: Use new infrastructure to fix deadlocks in execve
  proc: io_accounting: Use new infrastructure to fix deadlocks in execve
  proc: Use new infrastructure to fix deadlocks in execve
  kernel/kcmp.c: Use new infrastructure to fix deadlocks in execve
  kernel: doc: remove outdated comment cred.c
  mm: docs: Fix a comment in process_vm_rw_core
  selftests/ptrace: add test cases for dead-locks
  exec: Fix a deadlock in strace
  exec: Add exec_update_mutex to replace cred_guard_mutex
  exec: Move exec_mmap right after de_thread in flush_old_exec
  exec: Move cleanup of posix timers on exec out of de_thread
  exec: Factor unshare_sighand out of de_thread and call it separately
  exec: Only compute current once in flush_old_exec
  pid: Improve the comment about waiting in zap_pid_ns_processes
  proc: Remove the now unnecessary internal mount of proc
  uml: Create a private mount of proc for mconsole
  uml: Don't consult current to find the proc_mnt in mconsole_proc
  proc: Use a list of inodes to flush from proc
  ...
  • Loading branch information
torvalds committed Apr 2, 2020
2 parents 919dce2 + d1e7fd6 commit d987ca1
Show file tree
Hide file tree
Showing 26 changed files with 349 additions and 248 deletions.
28 changes: 27 additions & 1 deletion arch/um/drivers/mconsole_kern.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
#include "mconsole_kern.h"
#include <os.h>

static struct vfsmount *proc_mnt = NULL;

static int do_unlink_socket(struct notifier_block *notifier,
unsigned long what, void *data)
{
Expand Down Expand Up @@ -123,7 +125,7 @@ void mconsole_log(struct mc_request *req)

void mconsole_proc(struct mc_request *req)
{
struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
struct vfsmount *mnt = proc_mnt;
char *buf;
int len;
struct file *file;
Expand All @@ -134,6 +136,10 @@ void mconsole_proc(struct mc_request *req)
ptr += strlen("proc");
ptr = skip_spaces(ptr);

if (!mnt) {
mconsole_reply(req, "Proc not available", 1, 0);
goto out;
}
file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY, 0);
if (IS_ERR(file)) {
mconsole_reply(req, "Failed to open file", 1, 0);
Expand Down Expand Up @@ -683,6 +689,24 @@ void mconsole_stack(struct mc_request *req)
with_console(req, stack_proc, to);
}

static int __init mount_proc(void)
{
struct file_system_type *proc_fs_type;
struct vfsmount *mnt;

proc_fs_type = get_fs_type("proc");
if (!proc_fs_type)
return -ENODEV;

mnt = kern_mount(proc_fs_type);
put_filesystem(proc_fs_type);
if (IS_ERR(mnt))
return PTR_ERR(mnt);

proc_mnt = mnt;
return 0;
}

/*
* Changed by mconsole_setup, which is __setup, and called before SMP is
* active.
Expand All @@ -696,6 +720,8 @@ static int __init mconsole_init(void)
int err;
char file[UNIX_PATH_MAX];

mount_proc();

if (umid_file_name("mconsole", file, sizeof(file)))
return -1;
snprintf(mconsole_socket_name, sizeof(file), "%s", file);
Expand Down
80 changes: 55 additions & 25 deletions fs/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1036,16 +1036,26 @@ ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
}
EXPORT_SYMBOL(read_code);

/*
* Maps the mm_struct mm into the current task struct.
* On success, this function returns with the mutex
* exec_update_mutex locked.
*/
static int exec_mmap(struct mm_struct *mm)
{
struct task_struct *tsk;
struct mm_struct *old_mm, *active_mm;
int ret;

/* Notify parent that we're no longer interested in the old VM */
tsk = current;
old_mm = current->mm;
exec_mm_release(tsk, old_mm);

ret = mutex_lock_killable(&tsk->signal->exec_update_mutex);
if (ret)
return ret;

if (old_mm) {
sync_mm_rss(old_mm);
/*
Expand All @@ -1057,9 +1067,11 @@ static int exec_mmap(struct mm_struct *mm)
down_read(&old_mm->mmap_sem);
if (unlikely(old_mm->core_state)) {
up_read(&old_mm->mmap_sem);
mutex_unlock(&tsk->signal->exec_update_mutex);
return -EINTR;
}
}

task_lock(tsk);
active_mm = tsk->active_mm;
membarrier_exec_mmap(mm);
Expand Down Expand Up @@ -1215,10 +1227,22 @@ static int de_thread(struct task_struct *tsk)
/* we have changed execution domain */
tsk->exit_signal = SIGCHLD;

#ifdef CONFIG_POSIX_TIMERS
exit_itimers(sig);
flush_itimer_signals();
#endif
BUG_ON(!thread_group_leader(tsk));
return 0;

killed:
/* protects against exit_notify() and __exit_signal() */
read_lock(&tasklist_lock);
sig->group_exit_task = NULL;
sig->notify_count = 0;
read_unlock(&tasklist_lock);
return -EAGAIN;
}


static int unshare_sighand(struct task_struct *me)
{
struct sighand_struct *oldsighand = me->sighand;

if (refcount_read(&oldsighand->count) != 1) {
struct sighand_struct *newsighand;
Expand All @@ -1236,23 +1260,13 @@ static int de_thread(struct task_struct *tsk)

write_lock_irq(&tasklist_lock);
spin_lock(&oldsighand->siglock);
rcu_assign_pointer(tsk->sighand, newsighand);
rcu_assign_pointer(me->sighand, newsighand);
spin_unlock(&oldsighand->siglock);
write_unlock_irq(&tasklist_lock);

__cleanup_sighand(oldsighand);
}

BUG_ON(!thread_group_leader(tsk));
return 0;

killed:
/* protects against exit_notify() and __exit_signal() */
read_lock(&tasklist_lock);
sig->group_exit_task = NULL;
sig->notify_count = 0;
read_unlock(&tasklist_lock);
return -EAGAIN;
}

char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
Expand Down Expand Up @@ -1286,13 +1300,13 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
*/
int flush_old_exec(struct linux_binprm * bprm)
{
struct task_struct *me = current;
int retval;

/*
* Make sure we have a private signal table and that
* we are unassociated from the previous thread group.
* Make this the only thread in the thread group.
*/
retval = de_thread(current);
retval = de_thread(me);
if (retval)
goto out;

Expand All @@ -1312,26 +1326,39 @@ int flush_old_exec(struct linux_binprm * bprm)
goto out;

/*
* After clearing bprm->mm (to mark that current is using the
* prepared mm now), we have nothing left of the original
* After setting bprm->called_exec_mmap (to mark that current is
* using the prepared mm now), we have nothing left of the original
* process. If anything from here on returns an error, the check
* in search_binary_handler() will SEGV current.
*/
bprm->called_exec_mmap = 1;
bprm->mm = NULL;

#ifdef CONFIG_POSIX_TIMERS
exit_itimers(me->signal);
flush_itimer_signals();
#endif

/*
* Make the signal table private.
*/
retval = unshare_sighand(me);
if (retval)
goto out;

set_fs(USER_DS);
current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
PF_NOFREEZE | PF_NO_SETAFFINITY);
flush_thread();
current->personality &= ~bprm->per_clear;
me->personality &= ~bprm->per_clear;

/*
* We have to apply CLOEXEC before we change whether the process is
* dumpable (in setup_new_exec) to avoid a race with a process in userspace
* trying to access the should-be-closed file descriptors of a process
* undergoing exec(2).
*/
do_close_on_exec(current->files);
do_close_on_exec(me->files);
return 0;

out:
Expand Down Expand Up @@ -1412,7 +1439,7 @@ void setup_new_exec(struct linux_binprm * bprm)

/* An exec changes our domain. We are no longer part of the thread
group */
current->self_exec_id++;
WRITE_ONCE(current->self_exec_id, current->self_exec_id + 1);
flush_signal_handlers(current, 0);
}
EXPORT_SYMBOL(setup_new_exec);
Expand Down Expand Up @@ -1450,6 +1477,8 @@ static void free_bprm(struct linux_binprm *bprm)
{
free_arg_pages(bprm);
if (bprm->cred) {
if (bprm->called_exec_mmap)
mutex_unlock(&current->signal->exec_update_mutex);
mutex_unlock(&current->signal->cred_guard_mutex);
abort_creds(bprm->cred);
}
Expand Down Expand Up @@ -1499,6 +1528,7 @@ void install_exec_creds(struct linux_binprm *bprm)
* credentials; any time after this it may be unlocked.
*/
security_bprm_committed_creds(bprm);
mutex_unlock(&current->signal->exec_update_mutex);
mutex_unlock(&current->signal->cred_guard_mutex);
}
EXPORT_SYMBOL(install_exec_creds);
Expand Down Expand Up @@ -1690,7 +1720,7 @@ int search_binary_handler(struct linux_binprm *bprm)

read_lock(&binfmt_lock);
put_binfmt(fmt);
if (retval < 0 && !bprm->mm) {
if (retval < 0 && bprm->called_exec_mmap) {
/* we got to flush_old_exec() and failed after it */
read_unlock(&binfmt_lock);
force_sigsegv(SIGSEGV);
Expand Down
Loading

0 comments on commit d987ca1

Please sign in to comment.