Skip to content

Commit

Permalink
prctl: avoid using mmap_sem for exe_file serialization
Browse files Browse the repository at this point in the history
Oleg cleverly suggested using xchg() to set the new mm->exe_file instead
of calling set_mm_exe_file() which requires some form of serialization --
mmap_sem in this case.  For archs that do not have atomic rmw instructions
we still fallback to a spinlock alternative, so this should always be
safe.  As such, we only need the mmap_sem for looking up the backing
vm_file, which can be done sharing the lock.  Naturally, this means we
need to manually deal with both the new and old file reference counting,
and we need not worry about the MMF_EXE_FILE_CHANGED bits, which can
probably be deleted in the future anyway.

Signed-off-by: Davidlohr Bueso <[email protected]>
Suggested-by: Oleg Nesterov <[email protected]>
Acked-by: Oleg Nesterov <[email protected]>
Reviewed-by: Konstantin Khlebnikov <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Davidlohr Bueso authored and torvalds committed Apr 17, 2015
1 parent 90f31d0 commit 6e399cd
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 25 deletions.
6 changes: 6 additions & 0 deletions fs/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1078,7 +1078,13 @@ int flush_old_exec(struct linux_binprm * bprm)
if (retval)
goto out;

/*
* Must be called _before_ exec_mmap() as bprm->mm is
* not visibile until then. This also enables the update
* to be lockless.
*/
set_mm_exe_file(bprm->mm, bprm->file);

/*
* Release all of the old mmap stuff
*/
Expand Down
19 changes: 13 additions & 6 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -711,15 +711,22 @@ EXPORT_SYMBOL_GPL(mmput);
*
* This changes mm's executable file (shown as symlink /proc/[pid]/exe).
*
* Main users are mmput(), sys_execve() and sys_prctl(PR_SET_MM_MAP/EXE_FILE).
* Callers prevent concurrent invocations: in mmput() nobody alive left,
* in execve task is single-threaded, prctl holds mmap_sem exclusively.
* Main users are mmput() and sys_execve(). Callers prevent concurrent
* invocations: in mmput() nobody alive left, in execve task is single
* threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
* mm->exe_file, but does so without using set_mm_exe_file() in order
* to do avoid the need for any locks.
*/
void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
{
struct file *old_exe_file = rcu_dereference_protected(mm->exe_file,
!atomic_read(&mm->mm_users) || current->in_execve ||
lockdep_is_held(&mm->mmap_sem));
struct file *old_exe_file;

/*
* It is safe to dereference the exe_file without RCU as
* this function is only called if nobody else can access
* this mm -- see comment above for justification.
*/
old_exe_file = rcu_dereference_raw(mm->exe_file);

if (new_exe_file)
get_file(new_exe_file);
Expand Down
47 changes: 28 additions & 19 deletions kernel/sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -1649,14 +1649,13 @@ SYSCALL_DEFINE1(umask, int, mask)
return mask;
}

static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd)
static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
{
struct fd exe;
struct file *old_exe, *exe_file;
struct inode *inode;
int err;

VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);

exe = fdget(fd);
if (!exe.file)
return -EBADF;
Expand All @@ -1680,15 +1679,22 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd)
/*
* Forbid mm->exe_file change if old file still mapped.
*/
exe_file = get_mm_exe_file(mm);
err = -EBUSY;
if (mm->exe_file) {
if (exe_file) {
struct vm_area_struct *vma;

for (vma = mm->mmap; vma; vma = vma->vm_next)
if (vma->vm_file &&
path_equal(&vma->vm_file->f_path,
&mm->exe_file->f_path))
goto exit;
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!vma->vm_file)
continue;
if (path_equal(&vma->vm_file->f_path,
&exe_file->f_path))
goto exit_err;
}

up_read(&mm->mmap_sem);
fput(exe_file);
}

/*
Expand All @@ -1702,10 +1708,18 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd)
goto exit;

err = 0;
set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */
/* set the new file, lockless */
get_file(exe.file);
old_exe = xchg(&mm->exe_file, exe.file);
if (old_exe)
fput(old_exe);
exit:
fdput(exe);
return err;
exit_err:
up_read(&mm->mmap_sem);
fput(exe_file);
goto exit;
}

#ifdef CONFIG_CHECKPOINT_RESTORE
Expand Down Expand Up @@ -1840,10 +1854,9 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
}

down_write(&mm->mmap_sem);
if (prctl_map.exe_fd != (u32)-1)
error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd);
downgrade_write(&mm->mmap_sem);
error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
down_read(&mm->mmap_sem);
if (error)
goto out;

Expand Down Expand Up @@ -1909,12 +1922,8 @@ static int prctl_set_mm(int opt, unsigned long addr,
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;

if (opt == PR_SET_MM_EXE_FILE) {
down_write(&mm->mmap_sem);
error = prctl_set_mm_exe_file_locked(mm, (unsigned int)addr);
up_write(&mm->mmap_sem);
return error;
}
if (opt == PR_SET_MM_EXE_FILE)
return prctl_set_mm_exe_file(mm, (unsigned int)addr);

if (addr >= TASK_SIZE || addr < mmap_min_addr)
return -EINVAL;
Expand Down

0 comments on commit 6e399cd

Please sign in to comment.