Skip to content

Commit

Permalink
Merge tag 'fsnotify_for_v5.11-rc1' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:
 "A few fsnotify fixes from Amir fixing fallout from big fsnotify
  overhaul a few months back and an improvement of defaults limiting
  maximum number of inotify watches from Waiman"

* tag 'fsnotify_for_v5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  fsnotify: fix events reported to watching parent and child
  inotify: convert to handle_inode_event() interface
  fsnotify: generalize handle_inode_event()
  inotify: Increase default inotify.max_user_watches limit to 1048576
  • Loading branch information
torvalds committed Dec 17, 2020
2 parents d652d5f + fecc455 commit 14bd41e
Show file tree
Hide file tree
Showing 11 changed files with 120 additions and 104 deletions.
2 changes: 1 addition & 1 deletion fs/nfsd/filecache.c
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,7 @@ static struct notifier_block nfsd_file_lease_notifier = {
static int
nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
struct inode *inode, struct inode *dir,
const struct qstr *name)
const struct qstr *name, u32 cookie)
{
trace_nfsd_file_fsnotify_handle_event(inode, mask);

Expand Down
2 changes: 1 addition & 1 deletion fs/notify/dnotify/dnotify.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
*/
static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
struct inode *inode, struct inode *dir,
const struct qstr *name)
const struct qstr *name, u32 cookie)
{
struct dnotify_mark *dn_mark;
struct dnotify_struct *dn;
Expand Down
7 changes: 3 additions & 4 deletions fs/notify/fanotify/fanotify.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,12 +268,11 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
continue;

/*
* If the event is for a child and this mark is on a parent not
* If the event is on a child and this mark is on a parent not
* watching children, don't send it!
*/
if (event_mask & FS_EVENT_ON_CHILD &&
type == FSNOTIFY_OBJ_TYPE_INODE &&
!(mark->mask & FS_EVENT_ON_CHILD))
if (type == FSNOTIFY_OBJ_TYPE_PARENT &&
!(mark->mask & FS_EVENT_ON_CHILD))
continue;

marks_mask |= mark->mask;
Expand Down
107 changes: 71 additions & 36 deletions fs/notify/fsnotify.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt,
if (mask & FS_ISDIR)
return false;

/*
* All events that are possible on child can also may be reported with
* parent/name info to inode/sb/mount. Otherwise, a watching parent
* could result in events reported with unexpected name info to sb/mount.
*/
BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT);

/* Did either inode/sb/mount subscribe for events with parent/name? */
marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask);
marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask);
Expand Down Expand Up @@ -232,47 +239,76 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);

static int fsnotify_handle_inode_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
u32 mask, const void *data, int data_type,
struct inode *dir, const struct qstr *name,
u32 cookie)
{
const struct path *path = fsnotify_data_path(data, data_type);
struct inode *inode = fsnotify_data_inode(data, data_type);
const struct fsnotify_ops *ops = group->ops;

if (WARN_ON_ONCE(!ops->handle_inode_event))
return 0;

if ((inode_mark->mask & FS_EXCL_UNLINK) &&
path && d_unlinked(path->dentry))
return 0;

/* Check interest of this mark in case event was sent with two marks */
if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS))
return 0;

return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie);
}

static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
const void *data, int data_type,
struct inode *dir, const struct qstr *name,
u32 cookie, struct fsnotify_iter_info *iter_info)
{
struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info);
struct inode *inode = fsnotify_data_inode(data, data_type);
const struct fsnotify_ops *ops = group->ops;
struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info);
int ret;

if (WARN_ON_ONCE(!ops->handle_inode_event))
return 0;

if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) ||
WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info)))
return 0;

/*
* An event can be sent on child mark iterator instead of inode mark
* iterator because of other groups that have interest of this inode
* and have marks on both parent and child. We can simplify this case.
*/
if (!inode_mark) {
inode_mark = child_mark;
child_mark = NULL;
if (parent_mark) {
/*
* parent_mark indicates that the parent inode is watching
* children and interested in this event, which is an event
* possible on child. But is *this mark* watching children and
* interested in this event?
*/
if (parent_mark->mask & FS_EVENT_ON_CHILD) {
ret = fsnotify_handle_inode_event(group, parent_mark, mask,
data, data_type, dir, name, 0);
if (ret)
return ret;
}
if (!inode_mark)
return 0;
}

if (mask & FS_EVENT_ON_CHILD) {
/*
* Some events can be sent on both parent dir and child marks
* (e.g. FS_ATTRIB). If both parent dir and child are
* watching, report the event once to parent dir with name (if
* interested) and once to child without name (if interested).
* The child watcher is expecting an event without a file name
* and without the FS_EVENT_ON_CHILD flag.
*/
mask &= ~FS_EVENT_ON_CHILD;
dir = NULL;
name = NULL;
}

ret = ops->handle_inode_event(inode_mark, mask, inode, dir, name);
if (ret || !child_mark)
return ret;

/*
* Some events can be sent on both parent dir and child marks
* (e.g. FS_ATTRIB). If both parent dir and child are watching,
* report the event once to parent dir with name and once to child
* without name.
*/
return ops->handle_inode_event(child_mark, mask, inode, NULL, NULL);
return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type,
dir, name, cookie);
}

static int send_to_group(__u32 mask, const void *data, int data_type,
Expand Down Expand Up @@ -430,7 +466,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
struct fsnotify_iter_info iter_info = {};
struct super_block *sb;
struct mount *mnt = NULL;
struct inode *child = NULL;
struct inode *parent = NULL;
int ret = 0;
__u32 test_mask, marks_mask;

Expand All @@ -442,11 +478,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
inode = dir;
} else if (mask & FS_EVENT_ON_CHILD) {
/*
* Event on child - report on TYPE_INODE to dir if it is
* watching children and on TYPE_CHILD to child.
* Event on child - report on TYPE_PARENT to dir if it is
* watching children and on TYPE_INODE to child.
*/
child = inode;
inode = dir;
parent = dir;
}
sb = inode->i_sb;

Expand All @@ -460,16 +495,16 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
if (!sb->s_fsnotify_marks &&
(!mnt || !mnt->mnt_fsnotify_marks) &&
(!inode || !inode->i_fsnotify_marks) &&
(!child || !child->i_fsnotify_marks))
(!parent || !parent->i_fsnotify_marks))
return 0;

marks_mask = sb->s_fsnotify_mask;
if (mnt)
marks_mask |= mnt->mnt_fsnotify_mask;
if (inode)
marks_mask |= inode->i_fsnotify_mask;
if (child)
marks_mask |= child->i_fsnotify_mask;
if (parent)
marks_mask |= parent->i_fsnotify_mask;


/*
Expand All @@ -492,9 +527,9 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
fsnotify_first_mark(&inode->i_fsnotify_marks);
}
if (child) {
iter_info.marks[FSNOTIFY_OBJ_TYPE_CHILD] =
fsnotify_first_mark(&child->i_fsnotify_marks);
if (parent) {
iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] =
fsnotify_first_mark(&parent->i_fsnotify_marks);
}

/*
Expand Down
9 changes: 4 additions & 5 deletions fs/notify/inotify/inotify.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,10 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)

extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group);
extern int inotify_handle_event(struct fsnotify_group *group, u32 mask,
const void *data, int data_type,
struct inode *dir,
const struct qstr *file_name, u32 cookie,
struct fsnotify_iter_info *iter_info);
extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark,
u32 mask, struct inode *inode,
struct inode *dir,
const struct qstr *name, u32 cookie);

extern const struct fsnotify_ops inotify_fsnotify_ops;
extern struct kmem_cache *inotify_inode_mark_cachep;
Expand Down
51 changes: 8 additions & 43 deletions fs/notify/inotify/inotify_fsnotify.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,25 +55,21 @@ static int inotify_merge(struct list_head *list,
return event_compare(last_event, event);
}

static int inotify_one_event(struct fsnotify_group *group, u32 mask,
struct fsnotify_mark *inode_mark,
const struct path *path,
const struct qstr *file_name, u32 cookie)
int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
struct inode *inode, struct inode *dir,
const struct qstr *name, u32 cookie)
{
struct inotify_inode_mark *i_mark;
struct inotify_event_info *event;
struct fsnotify_event *fsn_event;
struct fsnotify_group *group = inode_mark->group;
int ret;
int len = 0;
int alloc_len = sizeof(struct inotify_event_info);
struct mem_cgroup *old_memcg;

if ((inode_mark->mask & FS_EXCL_UNLINK) &&
path && d_unlinked(path->dentry))
return 0;

if (file_name) {
len = file_name->len;
if (name) {
len = name->len;
alloc_len += len + 1;
}

Expand Down Expand Up @@ -117,7 +113,7 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
event->sync_cookie = cookie;
event->name_len = len;
if (len)
strcpy(event->name, file_name->name);
strcpy(event->name, name->name);

ret = fsnotify_add_event(group, fsn_event, inotify_merge);
if (ret) {
Expand All @@ -131,37 +127,6 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
return 0;
}

int inotify_handle_event(struct fsnotify_group *group, u32 mask,
const void *data, int data_type, struct inode *dir,
const struct qstr *file_name, u32 cookie,
struct fsnotify_iter_info *iter_info)
{
const struct path *path = fsnotify_data_path(data, data_type);
struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info);
int ret = 0;

if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info)))
return 0;

/*
* Some events cannot be sent on both parent and child marks
* (e.g. IN_CREATE). Those events are always sent on inode_mark.
* For events that are possible on both parent and child (e.g. IN_OPEN),
* event is sent on inode_mark with name if the parent is watching and
* is sent on child_mark without name if child is watching.
* If both parent and child are watching, report the event with child's
* name here and report another event without child's name below.
*/
if (inode_mark)
ret = inotify_one_event(group, mask, inode_mark, path,
file_name, cookie);
if (ret || !child_mark)
return ret;

return inotify_one_event(group, mask, child_mark, path, NULL, 0);
}

static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
{
inotify_ignored_and_remove_idr(fsn_mark, group);
Expand Down Expand Up @@ -227,7 +192,7 @@ static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
}

const struct fsnotify_ops inotify_fsnotify_ops = {
.handle_event = inotify_handle_event,
.handle_inode_event = inotify_handle_inode_event,
.free_group_priv = inotify_free_group_priv,
.free_event = inotify_free_event,
.freeing_mark = inotify_freeing_mark,
Expand Down
31 changes: 24 additions & 7 deletions fs/notify/inotify/inotify_user.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@

#include <asm/ioctls.h>

/*
* An inotify watch requires allocating an inotify_inode_mark structure as
* well as pinning the watched inode. Doubling the size of a VFS inode
* should be more than enough to cover the additional filesystem inode
* size increase.
*/
#define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \
2 * sizeof(struct inode))

/* configurable via /proc/sys/fs/inotify/ */
static int inotify_max_queued_events __read_mostly;

Expand Down Expand Up @@ -486,14 +495,10 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group)
{
struct inotify_inode_mark *i_mark;
struct fsnotify_iter_info iter_info = { };

fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE,
fsn_mark);

/* Queue ignore event for the watch */
inotify_handle_event(group, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE,
NULL, NULL, 0, &iter_info);
inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL,
0);

i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
/* remove this mark from the idr */
Expand Down Expand Up @@ -801,6 +806,18 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
*/
static int __init inotify_user_setup(void)
{
unsigned long watches_max;
struct sysinfo si;

si_meminfo(&si);
/*
* Allow up to 1% of addressable memory to be allocated for inotify
* watches (per user) limited to the range [8192, 1048576].
*/
watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) /
INOTIFY_WATCH_COST;
watches_max = clamp(watches_max, 8192UL, 1048576UL);

BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
Expand All @@ -827,7 +844,7 @@ static int __init inotify_user_setup(void)

inotify_max_queued_events = 16384;
init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max;

return 0;
}
Expand Down
Loading

0 comments on commit 14bd41e

Please sign in to comment.