Skip to content

Commit

Permalink
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kern…
Browse files Browse the repository at this point in the history
…el/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Improve performance for ext4 by allowing multiple process to perform
  direct I/O writes to preallocated blocks by using a shared inode lock
  instead of taking an exclusive lock.

  In addition, multiple bug fixes and cleanups"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix incorrect options show of original mount_opt and extend mount_opt2
  ext4: Fix possible corruption when moving a directory
  ext4: init error handle resource before init group descriptors
  ext4: fix task hung in ext4_xattr_delete_inode
  jbd2: fix data missing when reusing bh which is ready to be checkpointed
  ext4: update s_journal_inum if it changes after journal replay
  ext4: fail ext4_iget if special inode unallocated
  ext4: fix function prototype mismatch for ext4_feat_ktype
  ext4: remove unnecessary variable initialization
  ext4: fix inode tree inconsistency caused by ENOMEM
  ext4: refuse to create ea block when umounted
  ext4: optimize ea_inode block expansion
  ext4: remove dead code in updating backup sb
  ext4: dio take shared inode lock when overwriting preallocated blocks
  ext4: don't show commit interval if it is zero
  ext4: use ext4_fc_tl_mem in fast-commit replay path
  ext4: improve xattr consistency checking and error reporting
  • Loading branch information
torvalds committed Feb 28, 2023
2 parents ae3419f + e3645d7 commit b07ce43
Show file tree
Hide file tree
Showing 10 changed files with 247 additions and 146 deletions.
1 change: 1 addition & 0 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -1529,6 +1529,7 @@ struct ext4_sb_info {
unsigned int s_mount_opt2;
unsigned long s_mount_flags;
unsigned int s_def_mount_opt;
unsigned int s_def_mount_opt2;
ext4_fsblk_t s_sb_block;
atomic64_t s_resv_clusters;
kuid_t s_resuid;
Expand Down
2 changes: 1 addition & 1 deletion fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -3251,7 +3251,7 @@ static int ext4_split_extent_at(handle_t *handle,
ext4_ext_mark_unwritten(ex2);

err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
if (err != -ENOSPC && err != -EDQUOT)
if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM)
goto out;

if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
Expand Down
44 changes: 26 additions & 18 deletions fs/ext4/fast_commit.c
Original file line number Diff line number Diff line change
Expand Up @@ -1332,8 +1332,14 @@ struct dentry_info_args {
char *dname;
};

/* Same as struct ext4_fc_tl, but uses native endianness fields */
struct ext4_fc_tl_mem {
u16 fc_tag;
u16 fc_len;
};

static inline void tl_to_darg(struct dentry_info_args *darg,
struct ext4_fc_tl *tl, u8 *val)
struct ext4_fc_tl_mem *tl, u8 *val)
{
struct ext4_fc_dentry_info fcd;

Expand All @@ -1345,16 +1351,18 @@ static inline void tl_to_darg(struct dentry_info_args *darg,
darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info);
}

static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val)
static inline void ext4_fc_get_tl(struct ext4_fc_tl_mem *tl, u8 *val)
{
memcpy(tl, val, EXT4_FC_TAG_BASE_LEN);
tl->fc_len = le16_to_cpu(tl->fc_len);
tl->fc_tag = le16_to_cpu(tl->fc_tag);
struct ext4_fc_tl tl_disk;

memcpy(&tl_disk, val, EXT4_FC_TAG_BASE_LEN);
tl->fc_len = le16_to_cpu(tl_disk.fc_len);
tl->fc_tag = le16_to_cpu(tl_disk.fc_tag);
}

/* Unlink replay function */
static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
u8 *val)
static int ext4_fc_replay_unlink(struct super_block *sb,
struct ext4_fc_tl_mem *tl, u8 *val)
{
struct inode *inode, *old_parent;
struct qstr entry;
Expand Down Expand Up @@ -1451,8 +1459,8 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
}

/* Link replay function */
static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
u8 *val)
static int ext4_fc_replay_link(struct super_block *sb,
struct ext4_fc_tl_mem *tl, u8 *val)
{
struct inode *inode;
struct dentry_info_args darg;
Expand Down Expand Up @@ -1506,8 +1514,8 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
/*
* Inode replay function
*/
static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
u8 *val)
static int ext4_fc_replay_inode(struct super_block *sb,
struct ext4_fc_tl_mem *tl, u8 *val)
{
struct ext4_fc_inode fc_inode;
struct ext4_inode *raw_inode;
Expand Down Expand Up @@ -1609,8 +1617,8 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
* inode for which we are trying to create a dentry here, should already have
* been replayed before we start here.
*/
static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
u8 *val)
static int ext4_fc_replay_create(struct super_block *sb,
struct ext4_fc_tl_mem *tl, u8 *val)
{
int ret = 0;
struct inode *inode = NULL;
Expand Down Expand Up @@ -1708,7 +1716,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,

/* Replay add range tag */
static int ext4_fc_replay_add_range(struct super_block *sb,
struct ext4_fc_tl *tl, u8 *val)
struct ext4_fc_tl_mem *tl, u8 *val)
{
struct ext4_fc_add_range fc_add_ex;
struct ext4_extent newex, *ex;
Expand Down Expand Up @@ -1828,8 +1836,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,

/* Replay DEL_RANGE tag */
static int
ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
u8 *val)
ext4_fc_replay_del_range(struct super_block *sb,
struct ext4_fc_tl_mem *tl, u8 *val)
{
struct inode *inode;
struct ext4_fc_del_range lrange;
Expand Down Expand Up @@ -2025,7 +2033,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
struct ext4_fc_replay_state *state;
int ret = JBD2_FC_REPLAY_CONTINUE;
struct ext4_fc_add_range ext;
struct ext4_fc_tl tl;
struct ext4_fc_tl_mem tl;
struct ext4_fc_tail tail;
__u8 *start, *end, *cur, *val;
struct ext4_fc_head head;
Expand Down Expand Up @@ -2144,7 +2152,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
{
struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_fc_tl tl;
struct ext4_fc_tl_mem tl;
__u8 *start, *end, *cur, *val;
int ret = JBD2_FC_REPLAY_CONTINUE;
struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
Expand Down
34 changes: 22 additions & 12 deletions fs/ext4/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,9 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
return false;
}

/* Is IO overwriting allocated and initialized blocks? */
static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
/* Is IO overwriting allocated or initialized blocks? */
static bool ext4_overwrite_io(struct inode *inode,
loff_t pos, loff_t len, bool *unwritten)
{
struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits;
Expand All @@ -217,12 +218,15 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
blklen = map.m_len;

err = ext4_map_blocks(NULL, inode, &map, 0);
if (err != blklen)
return false;
/*
* 'err==len' means that all of the blocks have been preallocated,
* regardless of whether they have been initialized or not. To exclude
* unwritten extents, we need to check m_flags.
* regardless of whether they have been initialized or not. We need to
* check m_flags to distinguish the unwritten extents.
*/
return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
*unwritten = !(map.m_flags & EXT4_MAP_MAPPED);
return true;
}

static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
Expand Down Expand Up @@ -431,11 +435,16 @@ static const struct iomap_dio_ops ext4_dio_write_ops = {
* - For extending writes case we don't take the shared lock, since it requires
* updating inode i_disksize and/or orphan handling with exclusive lock.
*
* - shared locking will only be true mostly with overwrites. Otherwise we will
* switch to exclusive i_rwsem lock.
* - shared locking will only be true mostly with overwrites, including
* initialized blocks and unwritten blocks. For overwrite unwritten blocks
* we protect splitting extents by i_data_sem in ext4_inode_info, so we can
* also release exclusive i_rwsem lock.
*
* - Otherwise we will switch to exclusive i_rwsem lock.
*/
static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
bool *ilock_shared, bool *extend)
bool *ilock_shared, bool *extend,
bool *unwritten)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
Expand All @@ -459,7 +468,7 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
* in file_modified().
*/
if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
!ext4_overwrite_io(inode, offset, count))) {
!ext4_overwrite_io(inode, offset, count, unwritten))) {
if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN;
goto out;
Expand Down Expand Up @@ -491,7 +500,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t offset = iocb->ki_pos;
size_t count = iov_iter_count(from);
const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
bool extend = false, unaligned_io = false;
bool extend = false, unaligned_io = false, unwritten = false;
bool ilock_shared = true;

/*
Expand Down Expand Up @@ -534,7 +543,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_buffered_write_iter(iocb, from);
}

ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
ret = ext4_dio_write_checks(iocb, from,
&ilock_shared, &extend, &unwritten);
if (ret <= 0)
return ret;

Expand Down Expand Up @@ -582,7 +592,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
ext4_journal_stop(handle);
}

if (ilock_shared)
if (ilock_shared && !unwritten)
iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
(unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
Expand Down
20 changes: 9 additions & 11 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -4872,13 +4872,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
goto bad_inode;
raw_inode = ext4_raw_inode(&iloc);

if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
ext4_error_inode(inode, function, line, 0,
"iget: root inode unallocated");
ret = -EFSCORRUPTED;
goto bad_inode;
}

if ((flags & EXT4_IGET_HANDLE) &&
(raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
ret = -ESTALE;
Expand Down Expand Up @@ -4951,11 +4944,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
* NeilBrown 1999oct15
*/
if (inode->i_nlink == 0) {
if ((inode->i_mode == 0 ||
if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL ||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
ino != EXT4_BOOT_LOADER_INO) {
/* this inode is deleted */
ret = -ESTALE;
/* this inode is deleted or unallocated */
if (flags & EXT4_IGET_SPECIAL) {
ext4_error_inode(inode, function, line, 0,
"iget: special inode unallocated");
ret = -EFSCORRUPTED;
} else
ret = -ESTALE;
goto bad_inode;
}
/* The only unlinked inodes we let through here have
Expand Down Expand Up @@ -5788,7 +5786,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
int gdpblocks;
int idxblocks;
int ret = 0;
int ret;

/*
* How many index blocks need to touch to map @lblocks logical blocks
Expand Down
3 changes: 0 additions & 3 deletions fs/ext4/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,6 @@ static int ext4_update_backup_sb(struct super_block *sb,
set_buffer_uptodate(bh);
unlock_buffer(bh);

if (err)
goto out_bh;

if (handle) {
err = ext4_handle_dirty_metadata(handle, NULL, bh);
if (err)
Expand Down
11 changes: 10 additions & 1 deletion fs/ext4/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -3872,9 +3872,16 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
goto end_rename;
}
/*
* We need to protect against old.inode directory getting
* converted from inline directory format into a normal one.
*/
inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
retval = ext4_rename_dir_prepare(handle, &old);
if (retval)
if (retval) {
inode_unlock(old.inode);
goto end_rename;
}
}
/*
* If we're renaming a file within an inline_data dir and adding or
Expand Down Expand Up @@ -4006,6 +4013,8 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
} else {
ext4_journal_stop(handle);
}
if (old.dir_bh)
inode_unlock(old.inode);
release_bh:
brelse(old.dir_bh);
brelse(old.bh);
Expand Down
Loading

0 comments on commit b07ce43

Please sign in to comment.