Skip to content

Commit

Permalink
fsnotify: do not share events between notification groups
Browse files Browse the repository at this point in the history
Currently fsnotify framework creates one event structure for each
notification event and links this event into all interested notification
groups.  This is done so that we save memory when several notification
groups are interested in the event.  However the need for event
structure shared between inotify & fanotify bloats the event structure
so the result is often higher memory consumption.

Another problem is that fsnotify framework keeps path references with
outstanding events so that fanotify can return open file descriptors
with its events.  This has the undesirable effect that filesystem cannot
be unmounted while there are outstanding events - a regression for
inotify compared to a situation before it was converted to fsnotify
framework.  For fanotify this problem is hard to avoid and users of
fanotify should kind of expect this behavior when they ask for file
descriptors from notified files.

This patch changes fsnotify and its users to create separate event
structure for each group.  This allows for much simpler code (~400 lines
removed by this patch) and also smaller event structures.  For example
on 64-bit system original struct fsnotify_event consumes 120 bytes, plus
additional space for file name, additional 24 bytes for second and each
subsequent group linking the event, and additional 32 bytes for each
inotify group for private data.  After the conversion inotify event
consumes 48 bytes plus space for file name which is considerably less
memory unless file names are long and there are several groups
interested in the events (both of which are uncommon).  Fanotify event
fits in 56 bytes after the conversion (fanotify doesn't care about file
names so its events don't have to have it allocated).  A win unless
there are four or more fanotify groups interested in the event.

The conversion also solves the problem with unmount when only inotify is
used as we don't have to grab path references for inotify events.

[[email protected]: fanotify: fix corruption preventing startup]
Signed-off-by: Jan Kara <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Cc: Eric Paris <[email protected]>
Cc: Al Viro <[email protected]>
Signed-off-by: Hugh Dickins <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
jankara authored and torvalds committed Jan 22, 2014
1 parent e9fe690 commit 7053aee
Show file tree
Hide file tree
Showing 13 changed files with 318 additions and 708 deletions.
11 changes: 5 additions & 6 deletions fs/notify/dnotify/dnotify.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,21 +82,20 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
* events.
*/
static int dnotify_handle_event(struct fsnotify_group *group,
struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
struct fsnotify_event *event)
u32 mask, void *data, int data_type,
const unsigned char *file_name)
{
struct dnotify_mark *dn_mark;
struct inode *to_tell;
struct dnotify_struct *dn;
struct dnotify_struct **prev;
struct fown_struct *fown;
__u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
__u32 test_mask = mask & ~FS_EVENT_ON_CHILD;

BUG_ON(vfsmount_mark);

to_tell = event->to_tell;

dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);

spin_lock(&inode_mark->lock);
Expand Down Expand Up @@ -155,7 +154,7 @@ static struct fsnotify_ops dnotify_fsnotify_ops = {
.should_send_event = dnotify_should_send_event,
.free_group_priv = NULL,
.freeing_mark = NULL,
.free_event_priv = NULL,
.free_event = NULL,
};

/*
Expand Down
211 changes: 104 additions & 107 deletions fs/notify/fanotify/fanotify.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,91 +9,56 @@
#include <linux/types.h>
#include <linux/wait.h>

static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
#include "fanotify.h"

static bool should_merge(struct fsnotify_event *old_fsn,
struct fsnotify_event *new_fsn)
{
pr_debug("%s: old=%p new=%p\n", __func__, old, new);
struct fanotify_event_info *old, *new;

if (old->to_tell == new->to_tell &&
old->data_type == new->data_type &&
old->tgid == new->tgid) {
switch (old->data_type) {
case (FSNOTIFY_EVENT_PATH):
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
/* dont merge two permission events */
if ((old->mask & FAN_ALL_PERM_EVENTS) &&
(new->mask & FAN_ALL_PERM_EVENTS))
return false;
/* dont merge two permission events */
if ((old_fsn->mask & FAN_ALL_PERM_EVENTS) &&
(new_fsn->mask & FAN_ALL_PERM_EVENTS))
return false;
#endif
if ((old->path.mnt == new->path.mnt) &&
(old->path.dentry == new->path.dentry))
return true;
break;
case (FSNOTIFY_EVENT_NONE):
return true;
default:
BUG();
};
}
pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
old = FANOTIFY_E(old_fsn);
new = FANOTIFY_E(new_fsn);

if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
old->path.mnt == new->path.mnt &&
old->path.dentry == new->path.dentry)
return true;
return false;
}

/* and the list better be locked by something too! */
static struct fsnotify_event *fanotify_merge(struct list_head *list,
struct fsnotify_event *event)
{
struct fsnotify_event_holder *test_holder;
struct fsnotify_event *test_event = NULL;
struct fsnotify_event *new_event;
struct fsnotify_event *test_event;
bool do_merge = false;

pr_debug("%s: list=%p event=%p\n", __func__, list, event);


list_for_each_entry_reverse(test_holder, list, event_list) {
if (should_merge(test_holder->event, event)) {
test_event = test_holder->event;
list_for_each_entry_reverse(test_event, list, list) {
if (should_merge(test_event, event)) {
do_merge = true;
break;
}
}

if (!test_event)
if (!do_merge)
return NULL;

fsnotify_get_event(test_event);

/* if they are exactly the same we are done */
if (test_event->mask == event->mask)
return test_event;

/*
* if the refcnt == 2 this is the only queue
* for this event and so we can update the mask
* in place.
*/
if (atomic_read(&test_event->refcnt) == 2) {
test_event->mask |= event->mask;
return test_event;
}

new_event = fsnotify_clone_event(test_event);

/* done with test_event */
fsnotify_put_event(test_event);

/* couldn't allocate memory, merge was not possible */
if (unlikely(!new_event))
return ERR_PTR(-ENOMEM);

/* build new event and replace it on the list */
new_event->mask = (test_event->mask | event->mask);
fsnotify_replace_event(test_holder, new_event);

/* we hold a reference on new_event from clone_event */
return new_event;
test_event->mask |= event->mask;
return test_event;
}

#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static int fanotify_get_response_from_access(struct fsnotify_group *group,
struct fsnotify_event *event)
struct fanotify_event_info *event)
{
int ret;

Expand All @@ -106,7 +71,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
return 0;

/* userspace responded, convert to something usable */
spin_lock(&event->lock);
switch (event->response) {
case FAN_ALLOW:
ret = 0;
Expand All @@ -116,7 +80,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
ret = -EPERM;
}
event->response = 0;
spin_unlock(&event->lock);

pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
Expand All @@ -125,57 +88,17 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
}
#endif

static int fanotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *fanotify_mark,
struct fsnotify_event *event)
{
int ret = 0;
struct fsnotify_event *notify_event = NULL;

BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);

pr_debug("%s: group=%p event=%p\n", __func__, group, event);

notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
if (IS_ERR(notify_event))
return PTR_ERR(notify_event);

#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (event->mask & FAN_ALL_PERM_EVENTS) {
/* if we merged we need to wait on the new event */
if (notify_event)
event = notify_event;
ret = fanotify_get_response_from_access(group, event);
}
#endif

if (notify_event)
fsnotify_put_event(notify_event);

return ret;
}

static bool fanotify_should_send_event(struct fsnotify_group *group,
struct inode *to_tell,
struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmnt_mark,
__u32 event_mask, void *data, int data_type)
{
__u32 marks_mask, marks_ignored_mask;
struct path *path = data;

pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
"mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
pr_debug("%s: group=%p inode=%p inode_mark=%p vfsmnt_mark=%p "
"mask=%x data=%p data_type=%d\n", __func__, group, inode,
inode_mark, vfsmnt_mark, event_mask, data, data_type);

/* if we don't have enough info to send an event to userspace say no */
Expand Down Expand Up @@ -217,6 +140,70 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
return false;
}

static int fanotify_handle_event(struct fsnotify_group *group,
struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *fanotify_mark,
u32 mask, void *data, int data_type,
const unsigned char *file_name)
{
int ret = 0;
struct fanotify_event_info *event;
struct fsnotify_event *fsn_event;
struct fsnotify_event *notify_fsn_event;

BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);

pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
mask);

event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL);
if (unlikely(!event))
return -ENOMEM;

fsn_event = &event->fse;
fsnotify_init_event(fsn_event, inode, mask);
event->tgid = get_pid(task_tgid(current));
if (data_type == FSNOTIFY_EVENT_PATH) {
struct path *path = data;
event->path = *path;
path_get(&event->path);
} else {
event->path.mnt = NULL;
event->path.dentry = NULL;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
event->response = 0;
#endif

notify_fsn_event = fsnotify_add_notify_event(group, fsn_event,
fanotify_merge);
if (notify_fsn_event) {
/* Our event wasn't used in the end. Free it. */
fsnotify_destroy_event(group, fsn_event);
if (IS_ERR(notify_fsn_event))
return PTR_ERR(notify_fsn_event);
/* We need to ask about a different events after a merge... */
event = FANOTIFY_E(notify_fsn_event);
fsn_event = notify_fsn_event;
}

#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (fsn_event->mask & FAN_ALL_PERM_EVENTS)
ret = fanotify_get_response_from_access(group, event);
#endif
return ret;
}

static void fanotify_free_group_priv(struct fsnotify_group *group)
{
struct user_struct *user;
Expand All @@ -226,10 +213,20 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
free_uid(user);
}

static void fanotify_free_event(struct fsnotify_event *fsn_event)
{
struct fanotify_event_info *event;

event = FANOTIFY_E(fsn_event);
path_put(&event->path);
put_pid(event->tgid);
kmem_cache_free(fanotify_event_cachep, event);
}

const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
.should_send_event = fanotify_should_send_event,
.free_group_priv = fanotify_free_group_priv,
.free_event_priv = NULL,
.free_event = fanotify_free_event,
.freeing_mark = NULL,
};
23 changes: 23 additions & 0 deletions fs/notify/fanotify/fanotify.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#include <linux/fsnotify_backend.h>
#include <linux/path.h>
#include <linux/slab.h>

extern struct kmem_cache *fanotify_event_cachep;

struct fanotify_event_info {
struct fsnotify_event fse;
/*
* We hold ref to this path so it may be dereferenced at any point
* during this object's lifetime
*/
struct path path;
struct pid *tgid;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
u32 response; /* userspace answer to question */
#endif
};

static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
{
return container_of(fse, struct fanotify_event_info, fse);
}
Loading

0 comments on commit 7053aee

Please sign in to comment.