Skip to content

Commit

Permalink
Merge tag 'ext4-for-linus-5.8-rc1-2' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tytso/ext4

Pull more ext4 updates from Ted Ts'o:
 "This is the second round of ext4 commits for 5.8 merge window [1].

  It includes the per-inode DAX support, which was dependant on the DAX
  infrastructure which came in via the XFS tree, and a number of
  regression and bug fixes; most notably the "BUG: using
  smp_processor_id() in preemptible code in ext4_mb_new_blocks" reported
  by syzkaller"

[1] The pull request actually came in 15 minutes after I had tagged the
    rc1 release. Tssk, tssk, late..   - Linus

* tag 'ext4-for-linus-5.8-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4, jbd2: ensure panic by fix a race between jbd2 abort and ext4 error handlers
  ext4: support xattr gnu.* namespace for the Hurd
  ext4: mballoc: Use this_cpu_read instead of this_cpu_ptr
  ext4: avoid utf8_strncasecmp() with unstable name
  ext4: stop overwrite the errcode in ext4_setup_super
  ext4: fix partial cluster initialization when splitting extent
  ext4: avoid race conditions when remounting with options that change dax
  Documentation/dax: Update DAX enablement for ext4
  fs/ext4: Introduce DAX inode flag
  fs/ext4: Remove jflag variable
  fs/ext4: Make DAX mount option a tri-state
  fs/ext4: Only change S_DAX on inode load
  fs/ext4: Update ext4_should_use_dax()
  fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS
  fs/ext4: Disallow verity if inode is DAX
  fs/ext4: Narrow scope of DAX check in setflags
  • Loading branch information
torvalds committed Jun 15, 2020
2 parents b3a9e3b + 7b97d86 commit 3be20b6
Show file tree
Hide file tree
Showing 19 changed files with 290 additions and 73 deletions.
6 changes: 3 additions & 3 deletions Documentation/filesystems/dax.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ size when creating the filesystem.
Currently 3 filesystems support DAX: ext2, ext4 and xfs. Enabling DAX on them
is different.

Enabling DAX on ext4 and ext2
Enabling DAX on ext2
-----------------------------

When mounting the filesystem, use the "-o dax" option on the command line or
add 'dax' to the options in /etc/fstab. This works to enable DAX on all files
within the filesystem. It is equivalent to the '-o dax=always' behavior below.


Enabling DAX on xfs
-------------------
Enabling DAX on xfs and ext4
----------------------------

Summary
-------
Expand Down
3 changes: 3 additions & 0 deletions Documentation/filesystems/ext4/verity.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,6 @@ is encrypted as well as the data itself.

Verity files cannot have blocks allocated past the end of the verity
metadata.

Verity and DAX are not compatible and attempts to set both of these flags
on a file will fail.
3 changes: 2 additions & 1 deletion fs/ext4/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
extents_status.o file.o fsmap.o fsync.o hash.o ialloc.o \
indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \
mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \
super.o symlink.o sysfs.o xattr.o xattr_trusted.o xattr_user.o
super.o symlink.o sysfs.o xattr.o xattr_hurd.o xattr_trusted.o \
xattr_user.o

ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
Expand Down
16 changes: 16 additions & 0 deletions fs/ext4/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
struct qstr qstr = {.name = str, .len = len };
const struct dentry *parent = READ_ONCE(dentry->d_parent);
const struct inode *inode = READ_ONCE(parent->d_inode);
char strbuf[DNAME_INLINE_LEN];

if (!inode || !IS_CASEFOLDED(inode) ||
!EXT4_SB(inode->i_sb)->s_encoding) {
Expand All @@ -683,6 +684,21 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
return memcmp(str, name->name, len);
}

/*
* If the dentry name is stored in-line, then it may be concurrently
* modified by a rename. If this happens, the VFS will eventually retry
* the lookup, so it doesn't matter what ->d_compare() returns.
* However, it's unsafe to call utf8_strncasecmp() with an unstable
* string. Therefore, we have to copy the name into a temporary buffer.
*/
if (len <= DNAME_INLINE_LEN - 1) {
memcpy(strbuf, str, len);
strbuf[len] = 0;
qstr.name = strbuf;
/* prevent compiler from optimizing out the temporary buffer */
barrier();
}

return ext4_ci_compare(inode, name, &qstr, false);
}

Expand Down
27 changes: 20 additions & 7 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -426,28 +426,33 @@ struct flex_groups {
#define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
/* 0x00400000 was formerly EXT4_EOFBLOCKS_FL */

#define EXT4_DAX_FL 0x02000000 /* Inode is DAX */

#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */

#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
#define EXT4_FL_USER_VISIBLE 0x725BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x624BC0FF /* User modifiable flags */

/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
EXT4_IMMUTABLE_FL | \
EXT4_APPEND_FL | \
EXT4_NODUMP_FL | \
EXT4_NOATIME_FL | \
EXT4_PROJINHERIT_FL)
EXT4_PROJINHERIT_FL | \
EXT4_DAX_FL)

/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\
EXT4_DAX_FL)

/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
Expand All @@ -459,6 +464,10 @@ struct flex_groups {
/* The only flags that should be swapped */
#define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL)

/* Flags which are mutually exclusive to DAX */
#define EXT4_DAX_MUT_EXCL (EXT4_VERITY_FL | EXT4_ENCRYPT_FL |\
EXT4_JOURNAL_DATA_FL)

/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
{
Expand Down Expand Up @@ -499,6 +508,7 @@ enum {
EXT4_INODE_VERITY = 20, /* Verity protected inode */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
/* 22 was formerly EXT4_INODE_EOFBLOCKS */
EXT4_INODE_DAX = 25, /* Inode is DAX */
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */
EXT4_INODE_CASEFOLD = 30, /* Casefolded directory */
Expand Down Expand Up @@ -1135,9 +1145,9 @@ struct ext4_inode_info {
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
#ifdef CONFIG_FS_DAX
#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
#define EXT4_MOUNT_DAX_ALWAYS 0x00200 /* Direct Access */
#else
#define EXT4_MOUNT_DAX 0
#define EXT4_MOUNT_DAX_ALWAYS 0
#endif
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
Expand Down Expand Up @@ -1180,6 +1190,8 @@ struct ext4_inode_info {
blocks */
#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
file systems */
#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */
#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */

#define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
specified journal checksum */
Expand Down Expand Up @@ -1992,6 +2004,7 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
*/
#define EXT4_FLAGS_RESIZING 0
#define EXT4_FLAGS_SHUTDOWN 1
#define EXT4_FLAGS_BDEV_IS_DAX 2

static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
{
Expand Down Expand Up @@ -2705,7 +2718,7 @@ extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
extern int ext4_break_layouts(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
extern void ext4_set_inode_flags(struct inode *);
extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
Expand Down
2 changes: 1 addition & 1 deletion fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -2844,7 +2844,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
* in use to avoid freeing it when removing blocks.
*/
if (sbi->s_cluster_ratio > 1) {
pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
partial.pclu = EXT4_B2C(sbi, pblk);
partial.state = nofree;
}
Expand Down
2 changes: 1 addition & 1 deletion fs/ext4/ialloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ei->i_block_group = group;
ei->i_last_alloc_group = ~0;

ext4_set_inode_flags(inode);
ext4_set_inode_flags(inode, true);
if (IS_DIRSYNC(inode))
ext4_handle_sync(handle);
if (insert_inode_locked(inode) < 0) {
Expand Down
26 changes: 20 additions & 6 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -4403,9 +4403,11 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
}

static bool ext4_should_use_dax(struct inode *inode)
static bool ext4_should_enable_dax(struct inode *inode)
{
if (!test_opt(inode->i_sb, DAX))
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);

if (test_opt2(inode->i_sb, DAX_NEVER))
return false;
if (!S_ISREG(inode->i_mode))
return false;
Expand All @@ -4417,14 +4419,21 @@ static bool ext4_should_use_dax(struct inode *inode)
return false;
if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
return false;
return true;
if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags))
return false;
if (test_opt(inode->i_sb, DAX_ALWAYS))
return true;

return ext4_test_inode_flag(inode, EXT4_INODE_DAX);
}

void ext4_set_inode_flags(struct inode *inode)
void ext4_set_inode_flags(struct inode *inode, bool init)
{
unsigned int flags = EXT4_I(inode)->i_flags;
unsigned int new_fl = 0;

WARN_ON_ONCE(IS_DAX(inode) && init);

if (flags & EXT4_SYNC_FL)
new_fl |= S_SYNC;
if (flags & EXT4_APPEND_FL)
Expand All @@ -4435,8 +4444,13 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
if (ext4_should_use_dax(inode))

/* Because of the way inode_set_flags() works we must preserve S_DAX
* here if already set. */
new_fl |= (inode->i_flags & S_DAX);
if (init && ext4_should_enable_dax(inode))
new_fl |= S_DAX;

if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
if (flags & EXT4_CASEFOLD_FL)
Expand Down Expand Up @@ -4650,7 +4664,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
* not initialized on a new filesystem. */
}
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
ext4_set_inode_flags(inode);
ext4_set_inode_flags(inode, true);
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
if (ext4_has_feature_64bit(sb))
Expand Down
65 changes: 54 additions & 11 deletions fs/ext4/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,38 @@ static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
return 0;
}

static void ext4_dax_dontcache(struct inode *inode, unsigned int flags)
{
struct ext4_inode_info *ei = EXT4_I(inode);

if (S_ISDIR(inode->i_mode))
return;

if (test_opt2(inode->i_sb, DAX_NEVER) ||
test_opt(inode->i_sb, DAX_ALWAYS))
return;

if ((ei->i_flags ^ flags) & EXT4_DAX_FL)
d_mark_dontcache(inode);
}

static bool dax_compatible(struct inode *inode, unsigned int oldflags,
unsigned int flags)
{
if (flags & EXT4_DAX_FL) {
if ((oldflags & EXT4_DAX_MUT_EXCL) ||
ext4_test_inode_state(inode,
EXT4_STATE_VERITY_IN_PROGRESS)) {
return false;
}
}

if ((flags & EXT4_DAX_MUT_EXCL) && (oldflags & EXT4_DAX_FL))
return false;

return true;
}

static int ext4_ioctl_setflags(struct inode *inode,
unsigned int flags)
{
Expand All @@ -300,7 +332,6 @@ static int ext4_ioctl_setflags(struct inode *inode,
int err = -EPERM, migrate = 0;
struct ext4_iloc iloc;
unsigned int oldflags, mask, i;
unsigned int jflag;
struct super_block *sb = inode->i_sb;

/* Is it quota file? Do not allow user to mess with it */
Expand All @@ -309,9 +340,6 @@ static int ext4_ioctl_setflags(struct inode *inode,

oldflags = ei->i_flags;

/* The JOURNAL_DATA flag is modifiable only by root */
jflag = flags & EXT4_JOURNAL_DATA_FL;

err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
if (err)
goto flags_out;
Expand All @@ -320,10 +348,16 @@ static int ext4_ioctl_setflags(struct inode *inode,
* The JOURNAL_DATA flag can only be changed by
* the relevant capability.
*/
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
if (!capable(CAP_SYS_RESOURCE))
goto flags_out;
}

if (!dax_compatible(inode, oldflags, flags)) {
err = -EOPNOTSUPP;
goto flags_out;
}

if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
migrate = 1;

Expand Down Expand Up @@ -369,6 +403,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
if (err)
goto flags_err;

ext4_dax_dontcache(inode, flags);

for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
if (!(mask & EXT4_FL_USER_MODIFIABLE))
continue;
Expand All @@ -381,7 +417,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
ext4_clear_inode_flag(inode, i);
}

ext4_set_inode_flags(inode);
ext4_set_inode_flags(inode, false);

inode->i_ctime = current_time(inode);

err = ext4_mark_iloc_dirty(handle, inode, &iloc);
Expand All @@ -390,17 +427,18 @@ static int ext4_ioctl_setflags(struct inode *inode,
if (err)
goto flags_out;

if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
/*
* Changes to the journaling mode can cause unsafe changes to
* S_DAX if we are using the DAX mount option.
* S_DAX if the inode is DAX
*/
if (test_opt(inode->i_sb, DAX)) {
if (IS_DAX(inode)) {
err = -EBUSY;
goto flags_out;
}

err = ext4_change_inode_journal_flag(inode, jflag);
err = ext4_change_inode_journal_flag(inode,
flags & EXT4_JOURNAL_DATA_FL);
if (err)
goto flags_out;
}
Expand Down Expand Up @@ -527,12 +565,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
xflags |= FS_XFLAG_NOATIME;
if (iflags & EXT4_PROJINHERIT_FL)
xflags |= FS_XFLAG_PROJINHERIT;
if (iflags & EXT4_DAX_FL)
xflags |= FS_XFLAG_DAX;
return xflags;
}

#define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \
FS_XFLAG_DAX)

/* Transfer xflags flags to internal */
static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
Expand All @@ -551,6 +592,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
iflags |= EXT4_NOATIME_FL;
if (xflags & FS_XFLAG_PROJINHERIT)
iflags |= EXT4_PROJINHERIT_FL;
if (xflags & FS_XFLAG_DAX)
iflags |= EXT4_DAX_FL;

return iflags;
}
Expand Down
2 changes: 1 addition & 1 deletion fs/ext4/mballoc.c
Original file line number Diff line number Diff line change
Expand Up @@ -4708,7 +4708,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
}

ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
seq = *this_cpu_ptr(&discard_pa_seq);
seq = this_cpu_read(discard_pa_seq);
if (!ext4_mb_use_preallocated(ac)) {
ac->ac_op = EXT4_MB_HISTORY_ALLOC;
ext4_mb_normalize_request(ac, ar);
Expand Down
Loading

0 comments on commit 3be20b6

Please sign in to comment.