From 5fee5513072de85fd25c79479d3c5dac3c179220 Mon Sep 17 00:00:00 2001 From: Theodore Dubois Date: Mon, 15 Jan 2018 19:50:52 -0800 Subject: [PATCH] Implement close on exec --- fs/fd.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/fdtable.h | 25 +++++++ fs/poll.h | 39 +++++++++++ kernel/exec.c | 18 ++--- util/bits.h | 22 ++++++ 5 files changed, 287 insertions(+), 8 deletions(-) create mode 100644 fs/fd.c create mode 100644 fs/fdtable.h create mode 100644 fs/poll.h create mode 100644 util/bits.h diff --git a/fs/fd.c b/fs/fd.c new file mode 100644 index 0000000000..4d70da3989 --- /dev/null +++ b/fs/fd.c @@ -0,0 +1,191 @@ +#include +#include "kernel/process.h" +#include "kernel/errno.h" +#include "kernel/resource.h" +#include "kernel/fs.h" +#include "fs/fdtable.h" + +struct fdtable *fdtable_alloc(unsigned size) { + struct fdtable *fdt = malloc(sizeof(struct fdtable)); + if (fdt == NULL) + return ERR_PTR(_ENOMEM); + fdt->refcount = 1; + fdt->size = 0; + fdt->files = NULL; + fdt->cloexec = NULL; + int err = fdtable_resize(fdt, size); + if (err < 0) + return ERR_PTR(err); + return fdt; +} + +void fdtable_release(struct fdtable *table) { + if (--table->refcount == 0) { + for (fd_t f = 0; f < table->size; f++) + f_close(f); + free(table->files); + free(table->cloexec); + free(table); + } +} + +int fdtable_resize(struct fdtable *table, unsigned size) { + // currently the only legitimate use of this is to expand the table + assert(size > table->size); + + struct fd **files = malloc(sizeof(struct fd *) * size); + if (files == NULL) + return _ENOMEM; + memset(files, 0, sizeof(struct fd *) * size); + if (table->files) + memcpy(files, table->files, sizeof(struct fd *) * table->size); + + bits_t *cloexec = malloc(BITS_SIZE(size)); + if (cloexec == NULL) + return _ENOMEM; + memset(cloexec, 0, BITS_SIZE(size)); + if (table->cloexec) + memcpy(cloexec, table->cloexec, BITS_SIZE(table->size)); + + free(table->files); + table->files = files; + free(table->cloexec); + table->cloexec = cloexec; + table->size = size; + return 0; +} + +struct fdtable *fdtable_copy(struct fdtable *table) { + unsigned size = table->size; + struct fdtable *new_table = fdtable_alloc(size); + if (IS_ERR(new_table)) + return new_table; + memcpy(new_table->files, table->files, sizeof(struct fd *) * size); + for (fd_t f = 0; f < size; f++) + if (new_table->files[f]) + new_table->files[f]->refcount++; + memcpy(new_table->cloexec, table->cloexec, BITS_SIZE(size)); + return new_table; +} + +static inline bool f_in_range(fd_t f) { + return f < current->files->size; +} + +struct fd *f_get(fd_t f) { + if (!f_in_range(f)) + return NULL; + return current->files->files[f]; +} + +bool f_is_cloexec(fd_t f) { + return bit_test(f, current->files->cloexec); +} + +static int fdtable_expand(struct fdtable *table, fd_t max) { + unsigned size = max + 1; + if (table->size >= size) + return 0; + if (size > rlimit(RLIMIT_NOFILE_)) + return _EMFILE; + return fdtable_resize(table, max + 1); +} + +static fd_t f_install_start(struct fd *fd, fd_t start) { + struct fdtable *table = current->files; + unsigned size = rlimit(RLIMIT_NOFILE_); + if (size > table->size) + size = table->size; + + fd_t f; + for (f = start; f < size; f++) + if (table->files[f] == NULL) + break; + if (f >= size) { + int err = fdtable_expand(table, f); + if (err < 0) + f = err; + } + + if (f >= 0) + table->files[f] = fd; + else + fd_close(fd); + return f; +} + +fd_t f_install(struct fd *fd) { + return f_install_start(fd, 0); +} + +int f_close(fd_t f) { + struct fdtable *table = current->files; + struct fd *fd = f_get(f); + if (fd == NULL) + return _EBADF; + int err = fd_close(fd); + table->files[f] = NULL; + return err; +} + +dword_t sys_close(fd_t f) { + STRACE("close(%d)", f); + return f_close(f); +} + +#define F_DUPFD_ 0 +#define F_GETFD_ 1 +#define F_SETFD_ 2 +#define F_GETFL_ 3 +#define F_SETFL_ 4 + +dword_t sys_dup(fd_t f) { + struct fd *fd = f_get(f); + if (fd == NULL) + return _EBADF; + fd->refcount++; + return f_install(fd); +} + +dword_t sys_dup2(fd_t f, fd_t new_f) { + STRACE("dup2(%d, %d)\n", f, new_f); + struct fdtable *table = current->files; + struct fd *fd = f_get(f); + if (fd == NULL) + return _EBADF; + int err = fdtable_expand(table, new_f); + if (err < 0) + return err; + f_close(new_f); + fd->refcount++; + table->files[new_f] = fd; + return 0; +} + +dword_t sys_fcntl64(fd_t f, dword_t cmd, dword_t arg) { + struct fdtable *table = current->files; + struct fd *fd = f_get(f); + if (fd == NULL) + return _EBADF; + switch (cmd) { + case F_DUPFD_: + STRACE("fcntl(%d, F_DUPFD, %d)", f, arg); + fd->refcount++; + return f_install_start(fd, arg); + + case F_GETFD_: + STRACE("fcntl(%d, F_GETFD)", f); + return bit_test(f, table->cloexec); + case F_SETFD_: + STRACE("fcntl(%d, F_SETFD, 0x%x)", f, arg); + if (arg & 1) + bit_set(f, table->cloexec); + else + bit_clear(f, table->cloexec); + return 0; + + default: + return _EINVAL; + } +} + diff --git a/fs/fdtable.h b/fs/fdtable.h new file mode 100644 index 0000000000..b25ebd653c --- /dev/null +++ b/fs/fdtable.h @@ -0,0 +1,25 @@ +#ifndef FDTABLE_H +#define FDTABLE_H +#include "util/bits.h" + +struct fdtable { + atomic_uint refcount; + unsigned size; + struct fd **files; + bits_t *cloexec; +}; + +struct fdtable *fdtable_alloc(); +void fdtable_release(struct fdtable *table); +int fdtable_resize(struct fdtable *table, unsigned size); +struct fdtable *fdtable_copy(struct fdtable *table); +void fdtable_free(struct fdtable *table); + +struct fd *f_get(fd_t f); +bool f_is_cloexec(fd_t f); +void f_put(fd_t f, struct fd *fd); +// steals a reference to the fd, gives it to the table on success and destroys it on error +fd_t f_install(struct fd *fd); +int f_close(fd_t f); + +#endif diff --git a/fs/poll.h b/fs/poll.h new file mode 100644 index 0000000000..f06ad2848d --- /dev/null +++ b/fs/poll.h @@ -0,0 +1,39 @@ +#ifndef FS_POLL_H +#define FS_POLL_H +#include "kernel/fs.h" + +struct poll { + struct list poll_fds; + struct list real_poll_fds; + int notify_pipe[2]; + lock_t lock; +}; + +struct poll_fd { + // locked by containing struct poll + struct fd *fd; + struct list fds; + int types; + + // locked by containing struct fd + struct poll *poll; + struct list polls; +}; + +#define POLL_READ 1 +#define POLL_WRITE 4 +struct poll_event { + struct fd *fd; + int types; +}; +struct poll *poll_create(void); +int poll_add_fd(struct poll *poll, struct fd *fd, int types); +int poll_del_fd(struct poll *poll, struct fd *fd); +// please do not call this while holding any locks you would acquire in your poll operation +void poll_wake(struct fd *fd); +int poll_wait(struct poll *poll, struct poll_event *event, int timeout); +// does not lock the poll because lock ordering, you must ensure no other +// thread will add or remove fds from this poll +void poll_destroy(struct poll *poll); + +#endif diff --git a/kernel/exec.c b/kernel/exec.c index b9308eed08..467baf5c4e 100644 --- a/kernel/exec.c +++ b/kernel/exec.c @@ -8,6 +8,7 @@ #include "misc.h" #include "kernel/calls.h" #include "kernel/errno.h" +#include "fs/fdtable.h" #include "kernel/elf.h" #include "libvdso.so.h" @@ -469,16 +470,17 @@ int sys_execve(const char *file, char *const argv[], char *const envp[]) { return PTR_ERR(fd); int err = elf_exec(fd, file, argv, envp); - if (err != _ENOEXEC) { - fd_close(fd); - return err; - } + if (err != _ENOEXEC) + goto found; err = shebang_exec(fd, file, argv, envp); - if (err != _ENOEXEC) { - fd_close(fd); - return err; - } + if (err != _ENOEXEC) + goto found; + +found: fd_close(fd); + for (fd_t f = 0; f < current->files->size; f++) + if (f_is_cloexec(f)) + f_close(f); return 0; } diff --git a/util/bits.h b/util/bits.h new file mode 100644 index 0000000000..d865872d89 --- /dev/null +++ b/util/bits.h @@ -0,0 +1,22 @@ +#ifndef BITS_H +#define BITS_H + +typedef void bits_t; +#define BITS_SIZE(bits) ((((bits) - 1) / 8) + 1) + +static inline bool bit_test(size_t i, bits_t *data) { + char *c = data; + return c[i >> 3] & (1 << (i & 7)) ? 1 : 0; +} + +static inline void bit_set(size_t i, bits_t *data) { + char *c = data; + c[i >> 3] |= 1 << (i & 7); +} + +static inline void bit_clear(size_t i, bits_t *data) { + char *c = data; + c[i >> 3] &= ~(1 << (i & 7)); +} + +#endif