Skip to content

Commit 51f39a1

Browse files
daviddrysdaletorvalds
authored andcommittedDec 13, 2014
syscalls: implement execveat() system call
This patchset adds execveat(2) for x86, and is derived from Meredydd Luff's patch from Sept 2012 (https://lkml.org/lkml/2012/9/11/528). The primary aim of adding an execveat syscall is to allow an implementation of fexecve(3) that does not rely on the /proc filesystem, at least for executables (rather than scripts). The current glibc version of fexecve(3) is implemented via /proc, which causes problems in sandboxed or otherwise restricted environments. Given the desire for a /proc-free fexecve() implementation, HPA suggested (https://lkml.org/lkml/2006/7/11/556) that an execveat(2) syscall would be an appropriate generalization. Also, having a new syscall means that it can take a flags argument without back-compatibility concerns. The current implementation just defines the AT_EMPTY_PATH and AT_SYMLINK_NOFOLLOW flags, but other flags could be added in future -- for example, flags for new namespaces (as suggested at https://lkml.org/lkml/2006/7/11/474). Related history: - https://lkml.org/lkml/2006/12/27/123 is an example of someone realizing that fexecve() is likely to fail in a chroot environment. - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=514043 covered documenting the /proc requirement of fexecve(3) in its manpage, to "prevent other people from wasting their time". - https://bugzilla.redhat.com/show_bug.cgi?id=241609 described a problem where a process that did setuid() could not fexecve() because it no longer had access to /proc/self/fd; this has since been fixed. This patch (of 4): Add a new execveat(2) system call. execveat() is to execve() as openat() is to open(): it takes a file descriptor that refers to a directory, and resolves the filename relative to that. In addition, if the filename is empty and AT_EMPTY_PATH is specified, execveat() executes the file to which the file descriptor refers. This replicates the functionality of fexecve(), which is a system call in other UNIXen, but in Linux glibc it depends on opening "/proc/self/fd/<fd>" (and so relies on /proc being mounted). The filename fed to the executed program as argv[0] (or the name of the script fed to a script interpreter) will be of the form "/dev/fd/<fd>" (for an empty filename) or "/dev/fd/<fd>/<filename>", effectively reflecting how the executable was found. This does however mean that execution of a script in a /proc-less environment won't work; also, script execution via an O_CLOEXEC file descriptor fails (as the file will not be accessible after exec). Based on patches by Meredydd Luff. Signed-off-by: David Drysdale <[email protected]> Cc: Meredydd Luff <[email protected]> Cc: Shuah Khan <[email protected]> Cc: "Eric W. Biederman" <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Alexander Viro <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: Kees Cook <[email protected]> Cc: Arnd Bergmann <[email protected]> Cc: Rich Felker <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Michael Kerrisk <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent c0ef0cc commit 51f39a1

File tree

13 files changed

+145
-15
lines changed

13 files changed

+145
-15
lines changed
 

‎fs/binfmt_em86.c

+4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ static int load_em86(struct linux_binprm *bprm)
4242
return -ENOEXEC;
4343
}
4444

45+
/* Need to be able to load the file after exec */
46+
if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
47+
return -ENOENT;
48+
4549
allow_write_access(bprm->file);
4650
fput(bprm->file);
4751
bprm->file = NULL;

‎fs/binfmt_misc.c

+4
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ static int load_misc_binary(struct linux_binprm *bprm)
144144
if (!fmt)
145145
goto ret;
146146

147+
/* Need to be able to load the file after exec */
148+
if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
149+
return -ENOENT;
150+
147151
if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
148152
retval = remove_arg_zero(bprm);
149153
if (retval)

‎fs/binfmt_script.c

+10
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ static int load_script(struct linux_binprm *bprm)
2424

2525
if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
2626
return -ENOEXEC;
27+
28+
/*
29+
* If the script filename will be inaccessible after exec, typically
30+
* because it is a "/dev/fd/<fd>/.." path against an O_CLOEXEC fd, give
31+
* up now (on the assumption that the interpreter will want to load
32+
* this file).
33+
*/
34+
if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
35+
return -ENOENT;
36+
2737
/*
2838
* This section does the #! interpretation.
2939
* Sorta complicated, but hopefully it will work. -TYT

‎fs/exec.c

+100-13
Original file line numberDiff line numberDiff line change
@@ -748,18 +748,25 @@ EXPORT_SYMBOL(setup_arg_pages);
748748

749749
#endif /* CONFIG_MMU */
750750

751-
static struct file *do_open_exec(struct filename *name)
751+
static struct file *do_open_execat(int fd, struct filename *name, int flags)
752752
{
753753
struct file *file;
754754
int err;
755-
static const struct open_flags open_exec_flags = {
755+
struct open_flags open_exec_flags = {
756756
.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
757757
.acc_mode = MAY_EXEC | MAY_OPEN,
758758
.intent = LOOKUP_OPEN,
759759
.lookup_flags = LOOKUP_FOLLOW,
760760
};
761761

762-
file = do_filp_open(AT_FDCWD, name, &open_exec_flags);
762+
if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
763+
return ERR_PTR(-EINVAL);
764+
if (flags & AT_SYMLINK_NOFOLLOW)
765+
open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
766+
if (flags & AT_EMPTY_PATH)
767+
open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
768+
769+
file = do_filp_open(fd, name, &open_exec_flags);
763770
if (IS_ERR(file))
764771
goto out;
765772

@@ -770,12 +777,13 @@ static struct file *do_open_exec(struct filename *name)
770777
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
771778
goto exit;
772779

773-
fsnotify_open(file);
774-
775780
err = deny_write_access(file);
776781
if (err)
777782
goto exit;
778783

784+
if (name->name[0] != '\0')
785+
fsnotify_open(file);
786+
779787
out:
780788
return file;
781789

@@ -787,7 +795,7 @@ static struct file *do_open_exec(struct filename *name)
787795
struct file *open_exec(const char *name)
788796
{
789797
struct filename tmp = { .name = name };
790-
return do_open_exec(&tmp);
798+
return do_open_execat(AT_FDCWD, &tmp, 0);
791799
}
792800
EXPORT_SYMBOL(open_exec);
793801

@@ -1428,10 +1436,12 @@ static int exec_binprm(struct linux_binprm *bprm)
14281436
/*
14291437
* sys_execve() executes a new program.
14301438
*/
1431-
static int do_execve_common(struct filename *filename,
1432-
struct user_arg_ptr argv,
1433-
struct user_arg_ptr envp)
1439+
static int do_execveat_common(int fd, struct filename *filename,
1440+
struct user_arg_ptr argv,
1441+
struct user_arg_ptr envp,
1442+
int flags)
14341443
{
1444+
char *pathbuf = NULL;
14351445
struct linux_binprm *bprm;
14361446
struct file *file;
14371447
struct files_struct *displaced;
@@ -1472,15 +1482,36 @@ static int do_execve_common(struct filename *filename,
14721482
check_unsafe_exec(bprm);
14731483
current->in_execve = 1;
14741484

1475-
file = do_open_exec(filename);
1485+
file = do_open_execat(fd, filename, flags);
14761486
retval = PTR_ERR(file);
14771487
if (IS_ERR(file))
14781488
goto out_unmark;
14791489

14801490
sched_exec();
14811491

14821492
bprm->file = file;
1483-
bprm->filename = bprm->interp = filename->name;
1493+
if (fd == AT_FDCWD || filename->name[0] == '/') {
1494+
bprm->filename = filename->name;
1495+
} else {
1496+
if (filename->name[0] == '\0')
1497+
pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d", fd);
1498+
else
1499+
pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d/%s",
1500+
fd, filename->name);
1501+
if (!pathbuf) {
1502+
retval = -ENOMEM;
1503+
goto out_unmark;
1504+
}
1505+
/*
1506+
* Record that a name derived from an O_CLOEXEC fd will be
1507+
* inaccessible after exec. Relies on having exclusive access to
1508+
* current->files (due to unshare_files above).
1509+
*/
1510+
if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
1511+
bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
1512+
bprm->filename = pathbuf;
1513+
}
1514+
bprm->interp = bprm->filename;
14841515

14851516
retval = bprm_mm_init(bprm);
14861517
if (retval)
@@ -1521,6 +1552,7 @@ static int do_execve_common(struct filename *filename,
15211552
acct_update_integrals(current);
15221553
task_numa_free(current);
15231554
free_bprm(bprm);
1555+
kfree(pathbuf);
15241556
putname(filename);
15251557
if (displaced)
15261558
put_files_struct(displaced);
@@ -1538,6 +1570,7 @@ static int do_execve_common(struct filename *filename,
15381570

15391571
out_free:
15401572
free_bprm(bprm);
1573+
kfree(pathbuf);
15411574

15421575
out_files:
15431576
if (displaced)
@@ -1553,7 +1586,18 @@ int do_execve(struct filename *filename,
15531586
{
15541587
struct user_arg_ptr argv = { .ptr.native = __argv };
15551588
struct user_arg_ptr envp = { .ptr.native = __envp };
1556-
return do_execve_common(filename, argv, envp);
1589+
return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
1590+
}
1591+
1592+
int do_execveat(int fd, struct filename *filename,
1593+
const char __user *const __user *__argv,
1594+
const char __user *const __user *__envp,
1595+
int flags)
1596+
{
1597+
struct user_arg_ptr argv = { .ptr.native = __argv };
1598+
struct user_arg_ptr envp = { .ptr.native = __envp };
1599+
1600+
return do_execveat_common(fd, filename, argv, envp, flags);
15571601
}
15581602

15591603
#ifdef CONFIG_COMPAT
@@ -1569,7 +1613,23 @@ static int compat_do_execve(struct filename *filename,
15691613
.is_compat = true,
15701614
.ptr.compat = __envp,
15711615
};
1572-
return do_execve_common(filename, argv, envp);
1616+
return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
1617+
}
1618+
1619+
static int compat_do_execveat(int fd, struct filename *filename,
1620+
const compat_uptr_t __user *__argv,
1621+
const compat_uptr_t __user *__envp,
1622+
int flags)
1623+
{
1624+
struct user_arg_ptr argv = {
1625+
.is_compat = true,
1626+
.ptr.compat = __argv,
1627+
};
1628+
struct user_arg_ptr envp = {
1629+
.is_compat = true,
1630+
.ptr.compat = __envp,
1631+
};
1632+
return do_execveat_common(fd, filename, argv, envp, flags);
15731633
}
15741634
#endif
15751635

@@ -1609,11 +1669,38 @@ SYSCALL_DEFINE3(execve,
16091669
{
16101670
return do_execve(getname(filename), argv, envp);
16111671
}
1672+
1673+
SYSCALL_DEFINE5(execveat,
1674+
int, fd, const char __user *, filename,
1675+
const char __user *const __user *, argv,
1676+
const char __user *const __user *, envp,
1677+
int, flags)
1678+
{
1679+
int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
1680+
1681+
return do_execveat(fd,
1682+
getname_flags(filename, lookup_flags, NULL),
1683+
argv, envp, flags);
1684+
}
1685+
16121686
#ifdef CONFIG_COMPAT
16131687
COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
16141688
const compat_uptr_t __user *, argv,
16151689
const compat_uptr_t __user *, envp)
16161690
{
16171691
return compat_do_execve(getname(filename), argv, envp);
16181692
}
1693+
1694+
COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
1695+
const char __user *, filename,
1696+
const compat_uptr_t __user *, argv,
1697+
const compat_uptr_t __user *, envp,
1698+
int, flags)
1699+
{
1700+
int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
1701+
1702+
return compat_do_execveat(fd,
1703+
getname_flags(filename, lookup_flags, NULL),
1704+
argv, envp, flags);
1705+
}
16191706
#endif

‎fs/namei.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ void final_putname(struct filename *name)
130130

131131
#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename))
132132

133-
static struct filename *
133+
struct filename *
134134
getname_flags(const char __user *filename, int flags, int *empty)
135135
{
136136
struct filename *result, *err;

‎include/linux/binfmts.h

+4
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ struct linux_binprm {
5353
#define BINPRM_FLAGS_EXECFD_BIT 1
5454
#define BINPRM_FLAGS_EXECFD (1 << BINPRM_FLAGS_EXECFD_BIT)
5555

56+
/* filename of the binary will be inaccessible after exec */
57+
#define BINPRM_FLAGS_PATH_INACCESSIBLE_BIT 2
58+
#define BINPRM_FLAGS_PATH_INACCESSIBLE (1 << BINPRM_FLAGS_PATH_INACCESSIBLE_BIT)
59+
5660
/* Function parameter for binfmt->coredump */
5761
struct coredump_params {
5862
const siginfo_t *siginfo;

‎include/linux/compat.h

+3
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,9 @@ asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int);
357357

358358
asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
359359
const compat_uptr_t __user *envp);
360+
asmlinkage long compat_sys_execveat(int dfd, const char __user *filename,
361+
const compat_uptr_t __user *argv,
362+
const compat_uptr_t __user *envp, int flags);
360363

361364
asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
362365
compat_ulong_t __user *outp, compat_ulong_t __user *exp,

‎include/linux/fs.h

+1
Original file line numberDiff line numberDiff line change
@@ -2096,6 +2096,7 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *);
20962096
extern struct file * dentry_open(const struct path *, int, const struct cred *);
20972097
extern int filp_close(struct file *, fl_owner_t id);
20982098

2099+
extern struct filename *getname_flags(const char __user *, int, int *);
20992100
extern struct filename *getname(const char __user *);
21002101
extern struct filename *getname_kernel(const char *);
21012102

‎include/linux/sched.h

+4
Original file line numberDiff line numberDiff line change
@@ -2485,6 +2485,10 @@ extern void do_group_exit(int);
24852485
extern int do_execve(struct filename *,
24862486
const char __user * const __user *,
24872487
const char __user * const __user *);
2488+
extern int do_execveat(int, struct filename *,
2489+
const char __user * const __user *,
2490+
const char __user * const __user *,
2491+
int);
24882492
extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
24892493
struct task_struct *fork_idle(int);
24902494
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);

‎include/linux/syscalls.h

+5
Original file line numberDiff line numberDiff line change
@@ -877,4 +877,9 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
877877
asmlinkage long sys_getrandom(char __user *buf, size_t count,
878878
unsigned int flags);
879879
asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
880+
881+
asmlinkage long sys_execveat(int dfd, const char __user *filename,
882+
const char __user *const __user *argv,
883+
const char __user *const __user *envp, int flags);
884+
880885
#endif

‎include/uapi/asm-generic/unistd.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -707,9 +707,11 @@ __SYSCALL(__NR_getrandom, sys_getrandom)
707707
__SYSCALL(__NR_memfd_create, sys_memfd_create)
708708
#define __NR_bpf 280
709709
__SYSCALL(__NR_bpf, sys_bpf)
710+
#define __NR_execveat 281
711+
__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
710712

711713
#undef __NR_syscalls
712-
#define __NR_syscalls 281
714+
#define __NR_syscalls 282
713715

714716
/*
715717
* All syscalls below here should go away really,

‎kernel/sys_ni.c

+3
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,6 @@ cond_syscall(sys_seccomp);
226226

227227
/* access BPF programs and maps */
228228
cond_syscall(sys_bpf);
229+
230+
/* execveat */
231+
cond_syscall(sys_execveat);

‎lib/audit.c

+3
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ int audit_classify_syscall(int abi, unsigned syscall)
5353
#ifdef __NR_socketcall
5454
case __NR_socketcall:
5555
return 4;
56+
#endif
57+
#ifdef __NR_execveat
58+
case __NR_execveat:
5659
#endif
5760
case __NR_execve:
5861
return 5;

0 commit comments

Comments
 (0)
Please sign in to comment.