Skip to content

Commit

Permalink
proc: faster open/read/close with "permanent" files
Browse files Browse the repository at this point in the history
Now that "struct proc_ops" exist we can start putting there stuff which
could not fly with VFS "struct file_operations"...

Most of fs/proc/inode.c file is dedicated to make open/read/.../close
reliable in the event of disappearing /proc entries which usually happens
if module is getting removed.  Files like /proc/cpuinfo which never
disappear simply do not need such protection.

Save 2 atomic ops, 1 allocation, 1 free per open/read/close sequence for such
"permanent" files.

Enable "permanent" flag for

	/proc/cpuinfo
	/proc/kmsg
	/proc/modules
	/proc/slabinfo
	/proc/stat
	/proc/sysvipc/*
	/proc/swaps

More will come once I figure out foolproof way to prevent out module
authors from marking their stuff "permanent" for performance reasons
when it is not.

This should help with scalability: benchmark is "read /proc/cpuinfo R times
by N threads scattered over the system".

	N	R	t, s (before)	t, s (after)
	-----------------------------------------------------
	64	4096	1.582458	1.530502	-3.2%
	256	4096	6.371926	6.125168	-3.9%
	1024	4096	25.64888	24.47528	-4.6%

Benchmark source:

#include <chrono>
#include <iostream>
#include <thread>
#include <vector>

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

const int NR_CPUS = sysconf(_SC_NPROCESSORS_ONLN);
int N;
const char *filename;
int R;

int xxx = 0;

int glue(int n)
{
	cpu_set_t m;
	CPU_ZERO(&m);
	CPU_SET(n, &m);
	return sched_setaffinity(0, sizeof(cpu_set_t), &m);
}

void f(int n)
{
	glue(n % NR_CPUS);

	while (*(volatile int *)&xxx == 0) {
	}

	for (int i = 0; i < R; i++) {
		int fd = open(filename, O_RDONLY);
		char buf[4096];
		ssize_t rv = read(fd, buf, sizeof(buf));
		asm volatile ("" :: "g" (rv));
		close(fd);
	}
}

int main(int argc, char *argv[])
{
	if (argc < 4) {
		std::cerr << "usage: " << argv[0] << ' ' << "N /proc/filename R
";
		return 1;
	}

	N = atoi(argv[1]);
	filename = argv[2];
	R = atoi(argv[3]);

	for (int i = 0; i < NR_CPUS; i++) {
		if (glue(i) == 0)
			break;
	}

	std::vector<std::thread> T;
	T.reserve(N);
	for (int i = 0; i < N; i++) {
		T.emplace_back(f, i);
	}

	auto t0 = std::chrono::system_clock::now();
	{
		*(volatile int *)&xxx = 1;
		for (auto& t: T) {
			t.join();
		}
	}
	auto t1 = std::chrono::system_clock::now();
	std::chrono::duration<double> dt = t1 - t0;
	std::cout << dt.count() << '
';

	return 0;
}

P.S.:
Explicit randomization marker is added because adding non-function pointer
will silently disable structure layout randomization.

[[email protected]: coding style fixes]
Reported-by: kbuild test robot <[email protected]>
Reported-by: Dan Carpenter <[email protected]>
Signed-off-by: Alexey Dobriyan <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Cc: Al Viro <[email protected]>
Cc: Joe Perches <[email protected]>
Link: http://lkml.kernel.org/r/20200222201539.GA22576@avx2
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Alexey Dobriyan authored and torvalds committed Apr 7, 2020
1 parent 904f394 commit d919b33
Show file tree
Hide file tree
Showing 11 changed files with 194 additions and 54 deletions.
1 change: 1 addition & 0 deletions fs/proc/cpuinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
}

static const struct proc_ops cpuinfo_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_open = cpuinfo_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
Expand Down
31 changes: 28 additions & 3 deletions fs/proc/generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,12 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
return p;
}

static inline void pde_set_flags(struct proc_dir_entry *pde)
{
if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT)
pde->flags |= PROC_ENTRY_PERMANENT;
}

struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
struct proc_dir_entry *parent,
const struct proc_ops *proc_ops, void *data)
Expand All @@ -541,6 +547,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
if (!p)
return NULL;
p->proc_ops = proc_ops;
pde_set_flags(p);
return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_data);
Expand Down Expand Up @@ -572,6 +579,7 @@ static int proc_seq_release(struct inode *inode, struct file *file)
}

static const struct proc_ops proc_seq_ops = {
/* not permanent -- can call into arbitrary seq_operations */
.proc_open = proc_seq_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
Expand Down Expand Up @@ -602,6 +610,7 @@ static int proc_single_open(struct inode *inode, struct file *file)
}

static const struct proc_ops proc_single_ops = {
/* not permanent -- can call into arbitrary ->single_show */
.proc_open = proc_single_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
Expand Down Expand Up @@ -662,9 +671,13 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)

de = pde_subdir_find(parent, fn, len);
if (de) {
rb_erase(&de->subdir_node, &parent->subdir);
if (S_ISDIR(de->mode)) {
parent->nlink--;
if (unlikely(pde_is_permanent(de))) {
WARN(1, "removing permanent /proc entry '%s'", de->name);
de = NULL;
} else {
rb_erase(&de->subdir_node, &parent->subdir);
if (S_ISDIR(de->mode))
parent->nlink--;
}
}
write_unlock(&proc_subdir_lock);
Expand Down Expand Up @@ -700,12 +713,24 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
write_unlock(&proc_subdir_lock);
return -ENOENT;
}
if (unlikely(pde_is_permanent(root))) {
write_unlock(&proc_subdir_lock);
WARN(1, "removing permanent /proc entry '%s/%s'",
root->parent->name, root->name);
return -EINVAL;
}
rb_erase(&root->subdir_node, &parent->subdir);

de = root;
while (1) {
next = pde_subdir_first(de);
if (next) {
if (unlikely(pde_is_permanent(root))) {
write_unlock(&proc_subdir_lock);
WARN(1, "removing permanent /proc entry '%s/%s'",
next->parent->name, next->name);
return -EINVAL;
}
rb_erase(&next->subdir_node, &de->subdir);
de = next;
continue;
Expand Down
187 changes: 137 additions & 50 deletions fs/proc/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -259,135 +259,204 @@ void proc_entry_rundown(struct proc_dir_entry *de)
spin_unlock(&de->pde_unload_lock);
}

static loff_t pde_lseek(struct proc_dir_entry *pde, struct file *file, loff_t offset, int whence)
{
typeof_member(struct proc_ops, proc_lseek) lseek;

lseek = pde->proc_ops->proc_lseek;
if (!lseek)
lseek = default_llseek;
return lseek(file, offset, whence);
}

static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
loff_t rv = -EINVAL;
if (use_pde(pde)) {
typeof_member(struct proc_ops, proc_lseek) lseek;

lseek = pde->proc_ops->proc_lseek;
if (!lseek)
lseek = default_llseek;
rv = lseek(file, offset, whence);
if (pde_is_permanent(pde)) {
return pde_lseek(pde, file, offset, whence);
} else if (use_pde(pde)) {
rv = pde_lseek(pde, file, offset, whence);
unuse_pde(pde);
}
return rv;
}

static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
typeof_member(struct proc_ops, proc_read) read;

read = pde->proc_ops->proc_read;
if (read)
return read(file, buf, count, ppos);
return -EIO;
}

static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
if (use_pde(pde)) {
typeof_member(struct proc_ops, proc_read) read;

read = pde->proc_ops->proc_read;
if (read)
rv = read(file, buf, count, ppos);
if (pde_is_permanent(pde)) {
return pde_read(pde, file, buf, count, ppos);
} else if (use_pde(pde)) {
rv = pde_read(pde, file, buf, count, ppos);
unuse_pde(pde);
}
return rv;
}

static ssize_t pde_write(struct proc_dir_entry *pde, struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
typeof_member(struct proc_ops, proc_write) write;

write = pde->proc_ops->proc_write;
if (write)
return write(file, buf, count, ppos);
return -EIO;
}

static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
if (use_pde(pde)) {
typeof_member(struct proc_ops, proc_write) write;

write = pde->proc_ops->proc_write;
if (write)
rv = write(file, buf, count, ppos);
if (pde_is_permanent(pde)) {
return pde_write(pde, file, buf, count, ppos);
} else if (use_pde(pde)) {
rv = pde_write(pde, file, buf, count, ppos);
unuse_pde(pde);
}
return rv;
}

static __poll_t pde_poll(struct proc_dir_entry *pde, struct file *file, struct poll_table_struct *pts)
{
typeof_member(struct proc_ops, proc_poll) poll;

poll = pde->proc_ops->proc_poll;
if (poll)
return poll(file, pts);
return DEFAULT_POLLMASK;
}

static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
__poll_t rv = DEFAULT_POLLMASK;
if (use_pde(pde)) {
typeof_member(struct proc_ops, proc_poll) poll;

poll = pde->proc_ops->proc_poll;
if (poll)
rv = poll(file, pts);
if (pde_is_permanent(pde)) {
return pde_poll(pde, file, pts);
} else if (use_pde(pde)) {
rv = pde_poll(pde, file, pts);
unuse_pde(pde);
}
return rv;
}

static long pde_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg)
{
typeof_member(struct proc_ops, proc_ioctl) ioctl;

ioctl = pde->proc_ops->proc_ioctl;
if (ioctl)
return ioctl(file, cmd, arg);
return -ENOTTY;
}

static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
if (use_pde(pde)) {
typeof_member(struct proc_ops, proc_ioctl) ioctl;

ioctl = pde->proc_ops->proc_ioctl;
if (ioctl)
rv = ioctl(file, cmd, arg);
if (pde_is_permanent(pde)) {
return pde_ioctl(pde, file, cmd, arg);
} else if (use_pde(pde)) {
rv = pde_ioctl(pde, file, cmd, arg);
unuse_pde(pde);
}
return rv;
}

#ifdef CONFIG_COMPAT
static long pde_compat_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg)
{
typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl;

compat_ioctl = pde->proc_ops->proc_compat_ioctl;
if (compat_ioctl)
return compat_ioctl(file, cmd, arg);
return -ENOTTY;
}

static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
if (use_pde(pde)) {
typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl;

compat_ioctl = pde->proc_ops->proc_compat_ioctl;
if (compat_ioctl)
rv = compat_ioctl(file, cmd, arg);
if (pde_is_permanent(pde)) {
return pde_compat_ioctl(pde, file, cmd, arg);
} else if (use_pde(pde)) {
rv = pde_compat_ioctl(pde, file, cmd, arg);
unuse_pde(pde);
}
return rv;
}
#endif

static int pde_mmap(struct proc_dir_entry *pde, struct file *file, struct vm_area_struct *vma)
{
typeof_member(struct proc_ops, proc_mmap) mmap;

mmap = pde->proc_ops->proc_mmap;
if (mmap)
return mmap(file, vma);
return -EIO;
}

static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
int rv = -EIO;
if (use_pde(pde)) {
typeof_member(struct proc_ops, proc_mmap) mmap;

mmap = pde->proc_ops->proc_mmap;
if (mmap)
rv = mmap(file, vma);
if (pde_is_permanent(pde)) {
return pde_mmap(pde, file, vma);
} else if (use_pde(pde)) {
rv = pde_mmap(pde, file, vma);
unuse_pde(pde);
}
return rv;
}

static unsigned long
proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
pde_get_unmapped_area(struct proc_dir_entry *pde, struct file *file, unsigned long orig_addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
unsigned long rv = -EIO;

if (use_pde(pde)) {
typeof_member(struct proc_ops, proc_get_unmapped_area) get_area;
typeof_member(struct proc_ops, proc_get_unmapped_area) get_area;

get_area = pde->proc_ops->proc_get_unmapped_area;
get_area = pde->proc_ops->proc_get_unmapped_area;
#ifdef CONFIG_MMU
if (!get_area)
get_area = current->mm->get_unmapped_area;
if (!get_area)
get_area = current->mm->get_unmapped_area;
#endif
if (get_area)
return get_area(file, orig_addr, len, pgoff, flags);
return orig_addr;
}

static unsigned long
proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
unsigned long rv = -EIO;

if (get_area)
rv = get_area(file, orig_addr, len, pgoff, flags);
else
rv = orig_addr;
if (pde_is_permanent(pde)) {
return pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags);
} else if (use_pde(pde)) {
rv = pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags);
unuse_pde(pde);
}
return rv;
Expand All @@ -401,6 +470,13 @@ static int proc_reg_open(struct inode *inode, struct file *file)
typeof_member(struct proc_ops, proc_release) release;
struct pde_opener *pdeo;

if (pde_is_permanent(pde)) {
open = pde->proc_ops->proc_open;
if (open)
rv = open(inode, file);
return rv;
}

/*
* Ensure that
* 1) PDE's ->release hook will be called no matter what
Expand Down Expand Up @@ -450,6 +526,17 @@ static int proc_reg_release(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
struct pde_opener *pdeo;

if (pde_is_permanent(pde)) {
typeof_member(struct proc_ops, proc_release) release;

release = pde->proc_ops->proc_release;
if (release) {
return release(inode, file);
}
return 0;
}

spin_lock(&pde->pde_unload_lock);
list_for_each_entry(pdeo, &pde->pde_openers, lh) {
if (pdeo->file == file) {
Expand Down
6 changes: 6 additions & 0 deletions fs/proc/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ struct proc_dir_entry {
struct rb_node subdir_node;
char *name;
umode_t mode;
u8 flags;
u8 namelen;
char inline_name[];
} __randomize_layout;
Expand All @@ -73,6 +74,11 @@ struct proc_dir_entry {
0)
#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry))

static inline bool pde_is_permanent(const struct proc_dir_entry *pde)
{
return pde->flags & PROC_ENTRY_PERMANENT;
}

extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);

Expand Down
Loading

0 comments on commit d919b33

Please sign in to comment.