Skip to content

Commit

Permalink
Merge branch 'vfs.file'
Browse files Browse the repository at this point in the history
Bring in the changes to the file infrastructure for this cycle. Mostly
cleanups and some performance tweaks.

* file: remove __receive_fd()
* file: stop exposing receive_fd_user()
* fs: replace f_rcuhead with f_task_work
* file: remove pointless wrapper
* file: s/close_fd_get_file()/file_close_fd()/g
* Improve __fget_files_rcu() code generation (and thus __fget_light())
* file: massage cleanup of files that failed to open

Signed-off-by: Christian Brauner <[email protected]>
  • Loading branch information
brauner committed Dec 21, 2023
2 parents 1bfc466 + 4e94ddf commit 2137e15
Show file tree
Hide file tree
Showing 16 changed files with 95 additions and 92 deletions.
2 changes: 1 addition & 1 deletion drivers/android/binder.c
Original file line number Diff line number Diff line change
Expand Up @@ -1921,7 +1921,7 @@ static void binder_deferred_fd_close(int fd)
if (!twcb)
return;
init_task_work(&twcb->twork, binder_do_fd_close);
twcb->file = close_fd_get_file(fd);
twcb->file = file_close_fd(fd);
if (twcb->file) {
// pin it until binder_do_fd_close(); see comments there
get_file(twcb->file);
Expand Down
2 changes: 1 addition & 1 deletion drivers/vdpa/vdpa_user/vduse_dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
fput(f);
break;
}
ret = receive_fd(f, perm_to_file_flags(entry.perm));
ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
fput(f);
break;
}
Expand Down
97 changes: 53 additions & 44 deletions fs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -629,19 +629,23 @@ void fd_install(unsigned int fd, struct file *file)
EXPORT_SYMBOL(fd_install);

/**
* pick_file - return file associatd with fd
* file_close_fd_locked - return file associated with fd
* @files: file struct to retrieve file from
* @fd: file descriptor to retrieve file for
*
* Doesn't take a separate reference count.
*
* Context: files_lock must be held.
*
* Returns: The file associated with @fd (NULL if @fd is not open)
*/
static struct file *pick_file(struct files_struct *files, unsigned fd)
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd)
{
struct fdtable *fdt = files_fdtable(files);
struct file *file;

lockdep_assert_held(&files->file_lock);

if (fd >= fdt->max_fds)
return NULL;

Expand All @@ -660,7 +664,7 @@ int close_fd(unsigned fd)
struct file *file;

spin_lock(&files->file_lock);
file = pick_file(files, fd);
file = file_close_fd_locked(files, fd);
spin_unlock(&files->file_lock);
if (!file)
return -EBADF;
Expand Down Expand Up @@ -707,7 +711,7 @@ static inline void __range_close(struct files_struct *files, unsigned int fd,
max_fd = min(max_fd, n);

for (; fd <= max_fd; fd++) {
file = pick_file(files, fd);
file = file_close_fd_locked(files, fd);
if (file) {
spin_unlock(&files->file_lock);
filp_close(file, files);
Expand Down Expand Up @@ -795,26 +799,21 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
return 0;
}

/*
* See close_fd_get_file() below, this variant assumes current->files->file_lock
* is held.
*/
struct file *__close_fd_get_file(unsigned int fd)
{
return pick_file(current->files, fd);
}

/*
* variant of close_fd that gets a ref on the file for later fput.
* The caller must ensure that filp_close() called on the file.
/**
* file_close_fd - return file associated with fd
* @fd: file descriptor to retrieve file for
*
* Doesn't take a separate reference count.
*
* Returns: The file associated with @fd (NULL if @fd is not open)
*/
struct file *close_fd_get_file(unsigned int fd)
struct file *file_close_fd(unsigned int fd)
{
struct files_struct *files = current->files;
struct file *file;

spin_lock(&files->file_lock);
file = pick_file(files, fd);
file = file_close_fd_locked(files, fd);
spin_unlock(&files->file_lock);

return file;
Expand Down Expand Up @@ -959,39 +958,54 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
struct file *file;
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
struct file __rcu **fdentry;
unsigned long nospec_mask;

if (unlikely(fd >= fdt->max_fds))
return NULL;

fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
/* Mask is a 0 for invalid fd's, ~0 for valid ones */
nospec_mask = array_index_mask_nospec(fd, fdt->max_fds);

/*
* Ok, we have a file pointer. However, because we do
* this all locklessly under RCU, we may be racing with
* that file being closed.
*
* Such a race can take two forms:
*
* (a) the file ref already went down to zero and the
* file hasn't been reused yet or the file count
* isn't zero but the file has already been reused.
* fdentry points to the 'fd' offset, or fdt->fd[0].
* Loading from fdt->fd[0] is always safe, because the
* array always exists.
*/
file = __get_file_rcu(fdentry);
fdentry = fdt->fd + (fd & nospec_mask);

/* Do the load, then mask any invalid result */
file = rcu_dereference_raw(*fdentry);
file = (void *)(nospec_mask & (unsigned long)file);
if (unlikely(!file))
return NULL;

if (unlikely(IS_ERR(file)))
/*
* Ok, we have a file pointer that was valid at
* some point, but it might have become stale since.
*
* We need to confirm it by incrementing the refcount
* and then check the lookup again.
*
* atomic_long_inc_not_zero() gives us a full memory
* barrier. We only really need an 'acquire' one to
* protect the loads below, but we don't have that.
*/
if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
continue;

/*
* Such a race can take two forms:
*
* (a) the file ref already went down to zero and the
* file hasn't been reused yet or the file count
* isn't zero but the file has already been reused.
*
* (b) the file table entry has changed under us.
* Note that we don't need to re-check the 'fdt->fd'
* pointer having changed, because it always goes
* hand-in-hand with 'fdt'.
*
* If so, we need to put our ref and try again.
*/
if (unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
if (unlikely(file != rcu_dereference_raw(*fdentry)) ||
unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
fput(file);
continue;
}
Expand Down Expand Up @@ -1128,13 +1142,13 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
* atomic_read_acquire() pairs with atomic_dec_and_test() in
* put_files_struct().
*/
if (atomic_read_acquire(&files->count) == 1) {
if (likely(atomic_read_acquire(&files->count) == 1)) {
file = files_lookup_fd_raw(files, fd);
if (!file || unlikely(file->f_mode & mask))
return 0;
return (unsigned long)file;
} else {
file = __fget(fd, mask);
file = __fget_files(files, fd, mask);
if (!file)
return 0;
return FDPUT_FPUT | (unsigned long)file;
Expand Down Expand Up @@ -1282,7 +1296,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
}

/**
* __receive_fd() - Install received file into file descriptor table
* receive_fd() - Install received file into file descriptor table
* @file: struct file that was received from another process
* @ufd: __user pointer to write new fd number to
* @o_flags: the O_* flags to apply to the new fd entry
Expand All @@ -1296,7 +1310,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
*
* Returns newly install fd or -ve on error.
*/
int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
{
int new_fd;
int error;
Expand All @@ -1321,6 +1335,7 @@ int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
__receive_sock(file);
return new_fd;
}
EXPORT_SYMBOL_GPL(receive_fd);

int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
{
Expand All @@ -1336,12 +1351,6 @@ int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
return new_fd;
}

int receive_fd(struct file *file, unsigned int o_flags)
{
return __receive_fd(file, NULL, o_flags);
}
EXPORT_SYMBOL_GPL(receive_fd);

static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
{
int err = -EBADF;
Expand Down
22 changes: 7 additions & 15 deletions fs/file_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,6 @@ static inline void file_free(struct file *f)
}
}

void release_empty_file(struct file *f)
{
WARN_ON_ONCE(f->f_mode & (FMODE_BACKING | FMODE_OPENED));
if (atomic_long_dec_and_test(&f->f_count)) {
security_file_free(f);
put_cred(f->f_cred);
if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
percpu_counter_dec(&nr_files);
kmem_cache_free(filp_cachep, f);
}
}

/*
* Return the total number of open files in the system
*/
Expand Down Expand Up @@ -419,7 +407,7 @@ static void delayed_fput(struct work_struct *unused)

static void ____fput(struct callback_head *work)
{
__fput(container_of(work, struct file, f_rcuhead));
__fput(container_of(work, struct file, f_task_work));
}

/*
Expand All @@ -445,9 +433,13 @@ void fput(struct file *file)
if (atomic_long_dec_and_test(&file->f_count)) {
struct task_struct *task = current;

if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
file_free(file);
return;
}
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
init_task_work(&file->f_rcuhead, ____fput);
if (!task_work_add(task, &file->f_rcuhead, TWA_RESUME))
init_task_work(&file->f_task_work, ____fput);
if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
return;
/*
* After this task has run exit_task_work(),
Expand Down
3 changes: 1 addition & 2 deletions fs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
struct file *alloc_empty_file(int flags, const struct cred *cred);
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
void release_empty_file(struct file *f);

static inline void file_put_write_access(struct file *file)
{
Expand Down Expand Up @@ -180,7 +179,7 @@ extern struct file *do_file_open_root(const struct path *,
const char *, const struct open_flags *);
extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
extern struct file *__close_fd_get_file(unsigned int fd);
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);

long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
int chmod_common(const struct path *path, umode_t mode);
Expand Down
5 changes: 1 addition & 4 deletions fs/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -3785,10 +3785,7 @@ static struct file *path_openat(struct nameidata *nd,
WARN_ON(1);
error = -EINVAL;
}
if (unlikely(file->f_mode & FMODE_OPENED))
fput(file);
else
release_empty_file(file);
fput(file);
if (error == -EOPENSTALE) {
if (flags & LOOKUP_RCU)
error = -ECHILD;
Expand Down
2 changes: 1 addition & 1 deletion fs/open.c
Original file line number Diff line number Diff line change
Expand Up @@ -1578,7 +1578,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
int retval;
struct file *file;

file = close_fd_get_file(fd);
file = file_close_fd(fd);
if (!file)
return -EBADF;

Expand Down
19 changes: 12 additions & 7 deletions include/linux/fdtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,17 @@ struct dentry;
static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = rcu_dereference_raw(files->fdt);

if (fd < fdt->max_fds) {
fd = array_index_nospec(fd, fdt->max_fds);
return rcu_dereference_raw(fdt->fd[fd]);
}
return NULL;
unsigned long mask = array_index_mask_nospec(fd, fdt->max_fds);
struct file *needs_masking;

/*
* 'mask' is zero for an out-of-bounds fd, all ones for ok.
* 'fd&mask' is 'fd' for ok, or 0 for out of bounds.
*
* Accessing fdt->fd[0] is ok, but needs masking of the result.
*/
needs_masking = rcu_dereference_raw(fdt->fd[fd&mask]);
return (struct file *)(mask & (unsigned long)needs_masking);
}

static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd)
Expand All @@ -114,7 +119,7 @@ int iterate_fd(struct files_struct *, unsigned,

extern int close_fd(unsigned int fd);
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
extern struct file *close_fd_get_file(unsigned int fd);
extern struct file *file_close_fd(unsigned int fd);
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
struct files_struct **new_fdp);

Expand Down
12 changes: 1 addition & 11 deletions include/linux/file.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,8 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),

extern void fd_install(unsigned int fd, struct file *file);

extern int __receive_fd(struct file *file, int __user *ufd,
unsigned int o_flags);
int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags);

extern int receive_fd(struct file *file, unsigned int o_flags);

static inline int receive_fd_user(struct file *file, int __user *ufd,
unsigned int o_flags)
{
if (ufd == NULL)
return -EFAULT;
return __receive_fd(file, ufd, o_flags);
}
int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags);

extern void flush_delayed_fput(void);
Expand Down
4 changes: 3 additions & 1 deletion include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -991,8 +991,10 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
*/
struct file {
union {
/* fput() uses task work when closing and freeing file (default). */
struct callback_head f_task_work;
/* fput() must use workqueue (most kernel threads). */
struct llist_node f_llist;
struct rcu_head f_rcuhead;
unsigned int f_iocb_flags;
};

Expand Down
9 changes: 9 additions & 0 deletions include/net/scm.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <linux/limits.h>
#include <linux/net.h>
#include <linux/cred.h>
#include <linux/file.h>
#include <linux/security.h>
#include <linux/pid.h>
#include <linux/nsproxy.h>
Expand Down Expand Up @@ -208,5 +209,13 @@ static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg,
scm_destroy_cred(scm);
}

static inline int scm_recv_one_fd(struct file *f, int __user *ufd,
unsigned int flags)
{
if (!ufd)
return -EFAULT;
return receive_fd(f, ufd, flags);
}

#endif /* __LINUX_NET_SCM_H */

Loading

0 comments on commit 2137e15

Please sign in to comment.