Skip to content

Commit

Permalink
Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/…
Browse files Browse the repository at this point in the history
…kernel/git/mszeredi/vfs

Pull overlayfs update from Miklos Szeredi:
 "The biggest part of this is making st_dev/st_ino on the overlay behave
  like a normal filesystem (i.e. st_ino doesn't change on copy up,
  st_dev is the same for all files and directories). Currently this only
  works if all layers are on the same filesystem, but future work will
  move the general case towards more sane behavior.

  There are also miscellaneous fixes, including fixes to handling
  append-only files. There's a small change in the VFS, but that only
  has an effect on overlayfs, since otherwise file->f_path.dentry->inode
  and file_inode(file) are always the same"

* 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
  ovl: update documentation w.r.t. constant inode numbers
  ovl: persistent inode numbers for upper hardlinks
  ovl: merge getattr for dir and nondir
  ovl: constant st_ino/st_dev across copy up
  ovl: persistent inode number for directories
  ovl: set the ORIGIN type flag
  ovl: lookup non-dir copy-up-origin by file handle
  ovl: use an auxiliary var for overlay root entry
  ovl: store file handle of lower inode on copy up
  ovl: check if all layers are on the same fs
  ovl: do not set overlay.opaque on non-dir create
  ovl: check IS_APPEND() on real upper inode
  vfs: ftruncate check IS_APPEND() on real upper inode
  ovl: Use designated initializers
  ovl: lockdep annotate of nested stacked overlayfs inode lock
  • Loading branch information
torvalds committed May 10, 2017
2 parents a2e5ad4 + 65f2673 commit b948abf
Show file tree
Hide file tree
Showing 10 changed files with 425 additions and 52 deletions.
9 changes: 8 additions & 1 deletion Documentation/filesystems/overlayfs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,19 @@ from accessing the corresponding object from the original filesystem.
This is most obvious from the 'st_dev' field returned by stat(2).

While directories will report an st_dev from the overlay-filesystem,
all non-directory objects will report an st_dev from the lower or
non-directory objects may report an st_dev from the lower filesystem or
upper filesystem that is providing the object. Similarly st_ino will
only be unique when combined with st_dev, and both of these can change
over the lifetime of a non-directory object. Many applications and
tools ignore these values and will not be affected.

In the special case of all overlay layers on the same underlying
filesystem, all objects will report an st_dev from the overlay
filesystem and st_ino from the underlying filesystem. This will
make the overlay mount more compliant with filesystem scanners and
overlay objects will be distinguishable from the corresponding
objects in the original filesystem.

Upper and Lower
---------------

Expand Down
3 changes: 2 additions & 1 deletion fs/open.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
goto out_putf;

error = -EPERM;
if (IS_APPEND(inode))
/* Check IS_APPEND on real upper inode */
if (IS_APPEND(file_inode(f.file)))
goto out_putf;

sb_start_write(inode->i_sb);
Expand Down
82 changes: 82 additions & 0 deletions fs/overlayfs/copy_up.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <linux/namei.h>
#include <linux/fdtable.h>
#include <linux/ratelimit.h>
#include <linux/exportfs.h>
#include "overlayfs.h"
#include "ovl_entry.h"

Expand Down Expand Up @@ -232,6 +233,79 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
return err;
}

static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_be *uuid)
{
struct ovl_fh *fh;
int fh_type, fh_len, dwords;
void *buf;
int buflen = MAX_HANDLE_SZ;

buf = kmalloc(buflen, GFP_TEMPORARY);
if (!buf)
return ERR_PTR(-ENOMEM);

/*
* We encode a non-connectable file handle for non-dir, because we
* only need to find the lower inode number and we don't want to pay
* the price or reconnecting the dentry.
*/
dwords = buflen >> 2;
fh_type = exportfs_encode_fh(lower, buf, &dwords, 0);
buflen = (dwords << 2);

fh = ERR_PTR(-EIO);
if (WARN_ON(fh_type < 0) ||
WARN_ON(buflen > MAX_HANDLE_SZ) ||
WARN_ON(fh_type == FILEID_INVALID))
goto out;

BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
fh_len = offsetof(struct ovl_fh, fid) + buflen;
fh = kmalloc(fh_len, GFP_KERNEL);
if (!fh) {
fh = ERR_PTR(-ENOMEM);
goto out;
}

fh->version = OVL_FH_VERSION;
fh->magic = OVL_FH_MAGIC;
fh->type = fh_type;
fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
fh->len = fh_len;
fh->uuid = *uuid;
memcpy(fh->fid, buf, buflen);

out:
kfree(buf);
return fh;
}

static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
struct dentry *upper)
{
struct super_block *sb = lower->d_sb;
uuid_be *uuid = (uuid_be *) &sb->s_uuid;
const struct ovl_fh *fh = NULL;
int err;

/*
* When lower layer doesn't support export operations store a 'null' fh,
* so we can use the overlay.origin xattr to distignuish between a copy
* up and a pure upper inode.
*/
if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
uuid_be_cmp(*uuid, NULL_UUID_BE)) {
fh = ovl_encode_fh(lower, uuid);
if (IS_ERR(fh))
return PTR_ERR(fh);
}

err = ovl_do_setxattr(upper, OVL_XATTR_ORIGIN, fh, fh ? fh->len : 0, 0);
kfree(fh);

return err;
}

static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
struct dentry *dentry, struct path *lowerpath,
struct kstat *stat, const char *link,
Expand Down Expand Up @@ -316,6 +390,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
if (err)
goto out_cleanup;

/*
* Store identifier of lower inode in upper inode xattr to
* allow lookup of the copy up origin inode.
*/
err = ovl_set_origin(dentry, lowerpath->dentry, temp);
if (err)
goto out_cleanup;

if (tmpfile)
err = ovl_do_link(temp, udir, upper, true);
else
Expand Down
37 changes: 5 additions & 32 deletions fs/overlayfs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,36 +138,6 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
return err;
}

static int ovl_dir_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
int err;
enum ovl_path_type type;
struct path realpath;
const struct cred *old_cred;

type = ovl_path_real(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
err = vfs_getattr(&realpath, stat, request_mask, flags);
revert_creds(old_cred);
if (err)
return err;

stat->dev = dentry->d_sb->s_dev;
stat->ino = dentry->d_inode->i_ino;

/*
* It's probably not worth it to count subdirs to get the
* correct link count. nlink=1 seems to pacify 'find' and
* other utilities.
*/
if (OVL_TYPE_MERGE(type))
stat->nlink = 1;

return 0;
}

/* Common operations required to be done after creation of file on upper */
static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry, bool hardlink)
Expand All @@ -182,6 +152,9 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
inc_nlink(inode);
}
d_instantiate(dentry, inode);
/* Force lookup of new upper hardlink to find its lower */
if (hardlink)
d_drop(dentry);
}

static bool ovl_type_merge(struct dentry *dentry)
Expand Down Expand Up @@ -210,7 +183,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
if (err)
goto out_dput;

if (ovl_type_merge(dentry->d_parent)) {
if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
/* Setting opaque here is just an optimization, allow to fail */
ovl_set_opaque(dentry, newdentry);
}
Expand Down Expand Up @@ -1070,7 +1043,7 @@ const struct inode_operations ovl_dir_inode_operations = {
.create = ovl_create,
.mknod = ovl_mknod,
.permission = ovl_permission,
.getattr = ovl_dir_getattr,
.getattr = ovl_getattr,
.listxattr = ovl_listxattr,
.get_acl = ovl_get_acl,
.update_time = ovl_update_time,
Expand Down
103 changes: 100 additions & 3 deletions fs/overlayfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,78 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
return err;
}

static int ovl_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
int ovl_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
enum ovl_path_type type;
struct path realpath;
const struct cred *old_cred;
bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
int err;

ovl_path_real(dentry, &realpath);
type = ovl_path_real(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
err = vfs_getattr(&realpath, stat, request_mask, flags);
if (err)
goto out;

/*
* When all layers are on the same fs, all real inode number are
* unique, so we use the overlay st_dev, which is friendly to du -x.
*
* We also use st_ino of the copy up origin, if we know it.
* This guaranties constant st_dev/st_ino across copy up.
*
* If filesystem supports NFS export ops, this also guaranties
* persistent st_ino across mount cycle.
*/
if (ovl_same_sb(dentry->d_sb)) {
if (OVL_TYPE_ORIGIN(type)) {
struct kstat lowerstat;
u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);

ovl_path_lower(dentry, &realpath);
err = vfs_getattr(&realpath, &lowerstat,
lowermask, flags);
if (err)
goto out;

WARN_ON_ONCE(stat->dev != lowerstat.dev);
/*
* Lower hardlinks are broken on copy up to different
* upper files, so we cannot use the lower origin st_ino
* for those different files, even for the same fs case.
*/
if (is_dir || lowerstat.nlink == 1)
stat->ino = lowerstat.ino;
}
stat->dev = dentry->d_sb->s_dev;
} else if (is_dir) {
/*
* If not all layers are on the same fs the pair {real st_ino;
* overlay st_dev} is not unique, so use the non persistent
* overlay st_ino.
*
* Always use the overlay st_dev for directories, so 'find
* -xdev' will scan the entire overlay mount and won't cross the
* overlay mount boundaries.
*/
stat->dev = dentry->d_sb->s_dev;
stat->ino = dentry->d_inode->i_ino;
}

/*
* It's probably not worth it to count subdirs to get the
* correct link count. nlink=1 seems to pacify 'find' and
* other utilities.
*/
if (is_dir && OVL_TYPE_MERGE(type))
stat->nlink = 1;

out:
revert_creds(old_cred);

return err;
}

Expand Down Expand Up @@ -303,6 +363,41 @@ static const struct inode_operations ovl_symlink_inode_operations = {
.update_time = ovl_update_time,
};

/*
* It is possible to stack overlayfs instance on top of another
* overlayfs instance as lower layer. We need to annonate the
* stackable i_mutex locks according to stack level of the super
* block instance. An overlayfs instance can never be in stack
* depth 0 (there is always a real fs below it). An overlayfs
* inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
*
* For example, here is a snip from /proc/lockdep_chains after
* dir_iterate of nested overlayfs:
*
* [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2)
* [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
* [...] &type->i_mutex_dir_key (stack_depth=0)
*/
#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH

static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
{
#ifdef CONFIG_LOCKDEP
static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];

int depth = inode->i_sb->s_stack_depth - 1;

if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING))
depth = 0;

if (S_ISDIR(inode->i_mode))
lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
else
lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
#endif
}

static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
inode->i_ino = get_next_ino();
Expand All @@ -312,6 +407,8 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
#endif

ovl_lockdep_annotate_inode_mutex_key(inode);

switch (mode & S_IFMT) {
case S_IFREG:
inode->i_op = &ovl_file_inode_operations;
Expand Down
Loading

0 comments on commit b948abf

Please sign in to comment.