Skip to content

Commit

Permalink
[PATCH] fdtable: Make fdarray and fdsets equal in size
Browse files Browse the repository at this point in the history
Currently, each fdtable supports three dynamically-sized arrays of data: the
fdarray and two fdsets.  The code allows the number of fds supported by the
fdarray (fdtable->max_fds) to differ from the number of fds supported by each
of the fdsets (fdtable->max_fdset).

In practice, it is wasteful for these two sizes to differ: whenever we hit a
limit on the smaller-capacity structure, we will reallocate the entire fdtable
and all the dynamic arrays within it, so any delta in the memory used by the
larger-capacity structure will never be touched at all.

Rather than hogging this excess, we shouldn't even allocate it in the first
place, and keep the capacities of the fdarray and the fdsets equal.  This
patch removes fdtable->max_fdset.  As an added bonus, most of the supporting
code becomes simpler.

Signed-off-by: Vadim Lobanov <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Al Viro <[email protected]>
Cc: Dipankar Sarma <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Vadim Lobanov authored and Linus Torvalds committed Dec 10, 2006
1 parent f3d19c9 commit bbea9f6
Show file tree
Hide file tree
Showing 13 changed files with 48 additions and 81 deletions.
6 changes: 3 additions & 3 deletions arch/alpha/kernel/osf_sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
long timeout;
int ret = -EINVAL;
struct fdtable *fdt;
int max_fdset;
int max_fds;

timeout = MAX_SCHEDULE_TIMEOUT;
if (tvp) {
Expand All @@ -1003,9 +1003,9 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,

rcu_read_lock();
fdt = files_fdtable(current->files);
max_fdset = fdt->max_fdset;
max_fds = fdt->max_fds;
rcu_read_unlock();
if (n < 0 || n > max_fdset)
if (n < 0 || n > max_fds)
goto out_nofds;

/*
Expand Down
2 changes: 1 addition & 1 deletion arch/mips/kernel/kspd.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ static void sp_cleanup(void)
for (;;) {
unsigned long set;
i = j * __NFDBITS;
if (i >= fdt->max_fdset || i >= fdt->max_fds)
if (i >= fdt->max_fds)
break;
set = fdt->open_fds->fds_bits[j++];
while (set) {
Expand Down
10 changes: 5 additions & 5 deletions fs/compat.c
Original file line number Diff line number Diff line change
Expand Up @@ -1679,19 +1679,19 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
{
fd_set_bits fds;
char *bits;
int size, max_fdset, ret = -EINVAL;
int size, max_fds, ret = -EINVAL;
struct fdtable *fdt;

if (n < 0)
goto out_nofds;

/* max_fdset can increase, so grab it once to avoid race */
/* max_fds can increase, so grab it once to avoid race */
rcu_read_lock();
fdt = files_fdtable(current->files);
max_fdset = fdt->max_fdset;
max_fds = fdt->max_fds;
rcu_read_unlock();
if (n > max_fdset)
n = max_fdset;
if (n > max_fds)
n = max_fds;

/*
* We need 6 bitmaps (in/out/ex for both incoming and outgoing),
Expand Down
2 changes: 1 addition & 1 deletion fs/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,7 @@ static void flush_old_files(struct files_struct * files)
j++;
i = j * __NFDBITS;
fdt = files_fdtable(files);
if (i >= fdt->max_fds || i >= fdt->max_fdset)
if (i >= fdt->max_fds)
break;
set = fdt->close_on_exec->fds_bits[j];
if (!set)
Expand Down
5 changes: 2 additions & 3 deletions fs/fcntl.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,9 @@ static int locate_fd(struct files_struct *files,
start = files->next_fd;

newfd = start;
if (start < fdt->max_fdset) {
if (start < fdt->max_fds)
newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
fdt->max_fdset, start);
}
fdt->max_fds, start);

error = -EMFILE;
if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
Expand Down
56 changes: 22 additions & 34 deletions fs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ void free_fd_array(struct file **array, int num)

static void __free_fdtable(struct fdtable *fdt)
{
free_fdset(fdt->open_fds, fdt->max_fdset);
free_fdset(fdt->close_on_exec, fdt->max_fdset);
free_fdset(fdt->open_fds, fdt->max_fds);
free_fdset(fdt->close_on_exec, fdt->max_fds);
free_fd_array(fdt->fd, fdt->max_fds);
kfree(fdt);
}
Expand Down Expand Up @@ -98,7 +98,7 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
struct fdtable_defer *fddef;

BUG_ON(!fdt);
fdset_size = fdt->max_fdset / 8;
fdset_size = fdt->max_fds / 8;
fdarray_size = fdt->max_fds * sizeof(struct file *);

if (fdt->free_files) {
Expand All @@ -110,13 +110,11 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
kmem_cache_free(files_cachep, fdt->free_files);
return;
}
if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE &&
fdt->max_fds <= NR_OPEN_DEFAULT) {
if (fdt->max_fds <= NR_OPEN_DEFAULT)
/*
* The fdtable was embedded
*/
return;
}
if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
kfree(fdt->open_fds);
kfree(fdt->close_on_exec);
Expand All @@ -136,9 +134,7 @@ static void free_fdtable_rcu(struct rcu_head *rcu)

void free_fdtable(struct fdtable *fdt)
{
if (fdt->free_files ||
fdt->max_fdset > EMBEDDED_FD_SET_SIZE ||
fdt->max_fds > NR_OPEN_DEFAULT)
if (fdt->free_files || fdt->max_fds > NR_OPEN_DEFAULT)
call_rcu(&fdt->rcu, free_fdtable_rcu);
}

Expand All @@ -151,22 +147,21 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
int i;
int count;

BUG_ON(nfdt->max_fdset < fdt->max_fdset);
BUG_ON(nfdt->max_fds < fdt->max_fds);
/* Copy the existing tables and install the new pointers */

i = fdt->max_fdset / (sizeof(unsigned long) * 8);
count = (nfdt->max_fdset - fdt->max_fdset) / 8;
i = fdt->max_fds / (sizeof(unsigned long) * 8);
count = (nfdt->max_fds - fdt->max_fds) / 8;

/*
* Don't copy the entire array if the current fdset is
* not yet initialised.
*/
if (i) {
memcpy (nfdt->open_fds, fdt->open_fds,
fdt->max_fdset/8);
fdt->max_fds/8);
memcpy (nfdt->close_on_exec, fdt->close_on_exec,
fdt->max_fdset/8);
fdt->max_fds/8);
memset (&nfdt->open_fds->fds_bits[i], 0, count);
memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
}
Expand Down Expand Up @@ -201,7 +196,7 @@ fd_set * alloc_fdset(int num)

void free_fdset(fd_set *array, int num)
{
if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */
if (num <= NR_OPEN_DEFAULT) /* Don't free an embedded fdset */
return;
else if (num <= 8 * PAGE_SIZE)
kfree(array);
Expand All @@ -220,18 +215,6 @@ static struct fdtable *alloc_fdtable(int nr)
if (!fdt)
goto out;

nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nr + 1));
if (nfds > NR_OPEN)
nfds = NR_OPEN;

new_openset = alloc_fdset(nfds);
new_execset = alloc_fdset(nfds);
if (!new_openset || !new_execset)
goto out;
fdt->open_fds = new_openset;
fdt->close_on_exec = new_execset;
fdt->max_fdset = nfds;

nfds = NR_OPEN_DEFAULT;
/*
* Expand to the max in easy steps, and keep expanding it until
Expand All @@ -251,15 +234,21 @@ static struct fdtable *alloc_fdtable(int nr)
nfds = NR_OPEN;
}
} while (nfds <= nr);

new_openset = alloc_fdset(nfds);
new_execset = alloc_fdset(nfds);
if (!new_openset || !new_execset)
goto out;
fdt->open_fds = new_openset;
fdt->close_on_exec = new_execset;

new_fds = alloc_fd_array(nfds);
if (!new_fds)
goto out2;
goto out;
fdt->fd = new_fds;
fdt->max_fds = nfds;
fdt->free_files = NULL;
return fdt;
out2:
nfds = fdt->max_fdset;
out:
free_fdset(new_openset, nfds);
free_fdset(new_execset, nfds);
Expand Down Expand Up @@ -290,7 +279,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
* we dropped the lock
*/
cur_fdt = files_fdtable(files);
if (nr >= cur_fdt->max_fds || nr >= cur_fdt->max_fdset) {
if (nr >= cur_fdt->max_fds) {
/* Continue as planned */
copy_fdtable(new_fdt, cur_fdt);
rcu_assign_pointer(files->fdt, new_fdt);
Expand All @@ -316,11 +305,10 @@ int expand_files(struct files_struct *files, int nr)

fdt = files_fdtable(files);
/* Do we need to expand? */
if (nr < fdt->max_fdset && nr < fdt->max_fds)
if (nr < fdt->max_fds)
return 0;
/* Can we expand? */
if (fdt->max_fdset >= NR_OPEN || fdt->max_fds >= NR_OPEN ||
nr >= NR_OPEN)
if (nr >= NR_OPEN)
return -EMFILE;

/* All good, so we try */
Expand Down
3 changes: 1 addition & 2 deletions fs/open.c
Original file line number Diff line number Diff line change
Expand Up @@ -864,8 +864,7 @@ int get_unused_fd(void)

repeat:
fdt = files_fdtable(files);
fd = find_next_zero_bit(fdt->open_fds->fds_bits,
fdt->max_fdset,
fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
files->next_fd);

/*
Expand Down
10 changes: 5 additions & 5 deletions fs/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
{
fd_set_bits fds;
void *bits;
int ret, max_fdset;
int ret, max_fds;
unsigned int size;
struct fdtable *fdt;
/* Allocate small arguments on the stack to save memory and be faster */
Expand All @@ -321,13 +321,13 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
if (n < 0)
goto out_nofds;

/* max_fdset can increase, so grab it once to avoid race */
/* max_fds can increase, so grab it once to avoid race */
rcu_read_lock();
fdt = files_fdtable(current->files);
max_fdset = fdt->max_fdset;
max_fds = fdt->max_fds;
rcu_read_unlock();
if (n > max_fdset)
n = max_fdset;
if (n > max_fds)
n = max_fds;

/*
* We need 6 bitmaps (in/out/ex for both incoming and outgoing),
Expand Down
6 changes: 0 additions & 6 deletions include/linux/file.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,8 @@ struct embedded_fd_set {
unsigned long fds_bits[1];
};

/*
* More than this number of fds: we use a separately allocated fd_set
*/
#define EMBEDDED_FD_SET_SIZE (BITS_PER_BYTE * sizeof(struct embedded_fd_set))

struct fdtable {
unsigned int max_fds;
int max_fdset;
struct file ** fd; /* current fd array */
fd_set *close_on_exec;
fd_set *open_fds;
Expand Down
1 change: 0 additions & 1 deletion include/linux/init_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#define INIT_FDTABLE \
{ \
.max_fds = NR_OPEN_DEFAULT, \
.max_fdset = EMBEDDED_FD_SET_SIZE, \
.fd = &init_files.fd_array[0], \
.close_on_exec = (fd_set *)&init_files.close_on_exec_init, \
.open_fds = (fd_set *)&init_files.open_fds_init, \
Expand Down
2 changes: 1 addition & 1 deletion kernel/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ static void close_files(struct files_struct * files)
for (;;) {
unsigned long set;
i = j * __NFDBITS;
if (i >= fdt->max_fdset || i >= fdt->max_fds)
if (i >= fdt->max_fds)
break;
set = fdt->open_fds->fds_bits[j++];
while (set) {
Expand Down
24 changes: 6 additions & 18 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)

static int count_open_files(struct fdtable *fdt)
{
int size = fdt->max_fdset;
int size = fdt->max_fds;
int i;

/* Find the last open fd */
Expand All @@ -641,7 +641,6 @@ static struct files_struct *alloc_files(void)
newf->next_fd = 0;
fdt = &newf->fdtab;
fdt->max_fds = NR_OPEN_DEFAULT;
fdt->max_fdset = EMBEDDED_FD_SET_SIZE;
fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
fdt->open_fds = (fd_set *)&newf->open_fds_init;
fdt->fd = &newf->fd_array[0];
Expand All @@ -662,7 +661,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
{
struct files_struct *newf;
struct file **old_fds, **new_fds;
int open_files, size, i, expand;
int open_files, size, i;
struct fdtable *old_fdt, *new_fdt;

*errorp = -ENOMEM;
Expand All @@ -673,25 +672,14 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
spin_lock(&oldf->file_lock);
old_fdt = files_fdtable(oldf);
new_fdt = files_fdtable(newf);
size = old_fdt->max_fdset;
open_files = count_open_files(old_fdt);
expand = 0;

/*
* Check whether we need to allocate a larger fd array or fd set.
* Note: we're not a clone task, so the open count won't change.
* Check whether we need to allocate a larger fd array and fd set.
* Note: we're not a clone task, so the open count won't change.
*/
if (open_files > new_fdt->max_fdset) {
new_fdt->max_fdset = 0;
expand = 1;
}
if (open_files > new_fdt->max_fds) {
new_fdt->max_fds = 0;
expand = 1;
}

/* if the old fdset gets grown now, we'll only copy up to "size" fds */
if (expand) {
spin_unlock(&oldf->file_lock);
spin_lock(&newf->file_lock);
*errorp = expand_files(newf, open_files-1);
Expand Down Expand Up @@ -739,8 +727,8 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
/* This is long word aligned thus could use a optimized version */
memset(new_fds, 0, size);

if (new_fdt->max_fdset > open_files) {
int left = (new_fdt->max_fdset-open_files)/8;
if (new_fdt->max_fds > open_files) {
int left = (new_fdt->max_fds-open_files)/8;
int start = open_files / (8 * sizeof(unsigned long));

memset(&new_fdt->open_fds->fds_bits[start], 0, left);
Expand Down
2 changes: 1 addition & 1 deletion security/selinux/hooks.c
Original file line number Diff line number Diff line change
Expand Up @@ -1734,7 +1734,7 @@ static inline void flush_unauthorized_files(struct files_struct * files)
j++;
i = j * __NFDBITS;
fdt = files_fdtable(files);
if (i >= fdt->max_fds || i >= fdt->max_fdset)
if (i >= fdt->max_fds)
break;
set = fdt->open_fds->fds_bits[j];
if (!set)
Expand Down

0 comments on commit bbea9f6

Please sign in to comment.