Skip to content

Commit

Permalink
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (64 commits)
  ext4: Update documentation about quota mount options
  ext4: replace MAX_DEFRAG_SIZE with EXT_MAX_BLOCK
  ext4: Fix the alloc on close after a truncate hueristic
  ext4: Add a tracepoint for ext4_alloc_da_blocks()
  ext4: store EXT4_EXT_MIGRATE in i_state instead of i_flags
  ext4: limit block allocations for indirect-block files to < 2^32
  ext4: Fix different block exchange issue in EXT4_IOC_MOVE_EXT
  ext4: Add null extent check to ext_get_path
  ext4: Replace BUG_ON() with ext4_error() in move_extents.c
  ext4: Replace get_ext_path macro with an inline funciton
  ext4: Fix include/trace/events/ext4.h to work with Systemtap
  ext4: Fix initalization of s_flex_groups
  ext4: Always set dx_node's fake_dirent explicitly.
  ext4: Fix async commit mode to be safe by using a barrier
  ext4: Don't update superblock write time when filesystem is read-only
  ext4: Clarify the locking details in mballoc
  ext4: check for need init flag in ext4_mb_load_buddy
  ext4: move ext4_mb_init_group() function earlier in the mballoc.c
  ext4: Make non-journal fsync work properly
  ext4: Assure that metadata blocks are written during fsync in no journal mode
  ...
  • Loading branch information
torvalds committed Sep 18, 2009
2 parents 6952b61 + 1358870 commit 3530c18
Show file tree
Hide file tree
Showing 25 changed files with 1,000 additions and 601 deletions.
24 changes: 13 additions & 11 deletions Documentation/filesystems/ext4.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,9 @@ ro Mount filesystem read only. Note that ext4 will
mount options "ro,noload" can be used to prevent
writes to the filesystem.

journal_checksum Enable checksumming of the journal transactions.
This will allow the recovery code in e2fsck and the
kernel to detect corruption in the kernel. It is a
compatible change and will be ignored by older kernels.

journal_async_commit Commit block can be written to disk without waiting
for descriptor blocks. If enabled older kernels cannot
mount the device. This will enable 'journal_checksum'
internally.
mount the device.

journal=update Update the ext4 file system's journal to the current
format.
Expand Down Expand Up @@ -263,10 +257,18 @@ resuid=n The user ID which may use the reserved blocks.

sb=n Use alternate superblock at this location.

quota
noquota
grpquota
usrquota
quota These options are ignored by the filesystem. They
noquota are used only by quota tools to recognize volumes
grpquota where quota should be turned on. See documentation
usrquota in the quota-tools package for more details
(http://sourceforge.net/projects/linuxquota).

jqfmt=<quota type> These options tell filesystem details about quota
usrjquota=<file> so that quota information can be properly updated
grpjquota=<file> during journal replay. They replace the above
quota options. See documentation in the quota-tools
package for more details
(http://sourceforge.net/projects/linuxquota).

bh (*) ext4 associates buffer heads to data pages to
nobh (a) cache disk block mapping information
Expand Down
11 changes: 10 additions & 1 deletion fs/ext4/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ config EXT4DEV_COMPAT

To enable backwards compatibility so that systems that are
still expecting to mount ext4 filesystems using ext4dev,
chose Y here. This feature will go away by 2.6.31, so
choose Y here. This feature will go away by 2.6.31, so
please arrange to get your userspace programs fixed!

config EXT4_FS_XATTR
Expand Down Expand Up @@ -77,3 +77,12 @@ config EXT4_FS_SECURITY

If you are not using a security module that requires using
extended attributes for file security labels, say N.

config EXT4_DEBUG
bool "EXT4 debugging support"
depends on EXT4_FS
help
Enables run-time debugging support for the ext4 filesystem.

If you select Y here, then you will be able to turn on debugging
with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug"
2 changes: 1 addition & 1 deletion fs/ext4/balloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
* new bitmap information
*/
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
ext4_mb_update_group_info(grp, blocks_freed);
grp->bb_free += blocks_freed;
up_write(&grp->alloc_sem);

/* We dirtied the bitmap block */
Expand Down
91 changes: 70 additions & 21 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,27 +67,29 @@ typedef unsigned int ext4_group_t;


/* prefer goal again. length */
#define EXT4_MB_HINT_MERGE 1
#define EXT4_MB_HINT_MERGE 0x0001
/* blocks already reserved */
#define EXT4_MB_HINT_RESERVED 2
#define EXT4_MB_HINT_RESERVED 0x0002
/* metadata is being allocated */
#define EXT4_MB_HINT_METADATA 4
#define EXT4_MB_HINT_METADATA 0x0004
/* first blocks in the file */
#define EXT4_MB_HINT_FIRST 8
#define EXT4_MB_HINT_FIRST 0x0008
/* search for the best chunk */
#define EXT4_MB_HINT_BEST 16
#define EXT4_MB_HINT_BEST 0x0010
/* data is being allocated */
#define EXT4_MB_HINT_DATA 32
#define EXT4_MB_HINT_DATA 0x0020
/* don't preallocate (for tails) */
#define EXT4_MB_HINT_NOPREALLOC 64
#define EXT4_MB_HINT_NOPREALLOC 0x0040
/* allocate for locality group */
#define EXT4_MB_HINT_GROUP_ALLOC 128
#define EXT4_MB_HINT_GROUP_ALLOC 0x0080
/* allocate goal blocks or none */
#define EXT4_MB_HINT_GOAL_ONLY 256
#define EXT4_MB_HINT_GOAL_ONLY 0x0100
/* goal is meaningful */
#define EXT4_MB_HINT_TRY_GOAL 512
#define EXT4_MB_HINT_TRY_GOAL 0x0200
/* blocks already pre-reserved by delayed allocation */
#define EXT4_MB_DELALLOC_RESERVED 1024
#define EXT4_MB_DELALLOC_RESERVED 0x0400
/* We are doing stream allocation */
#define EXT4_MB_STREAM_ALLOC 0x0800


struct ext4_allocation_request {
Expand All @@ -111,6 +113,21 @@ struct ext4_allocation_request {
unsigned int flags;
};

/*
* For delayed allocation tracking
*/
struct mpage_da_data {
struct inode *inode;
sector_t b_blocknr; /* start block number of extent */
size_t b_size; /* size of extent */
unsigned long b_state; /* state of the extent */
unsigned long first_page, next_page; /* extent of pages */
struct writeback_control *wbc;
int io_done;
int pages_written;
int retval;
};

/*
* Special inodes numbers
*/
Expand Down Expand Up @@ -251,7 +268,6 @@ struct flex_groups {
#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */

#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
Expand Down Expand Up @@ -289,6 +305,7 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */

/* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input {
Expand Down Expand Up @@ -386,6 +403,9 @@ struct ext4_mount_options {
#endif
};

/* Max physical block we can addres w/o extents */
#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF

/*
* Structure of an inode on the disk
*/
Expand Down Expand Up @@ -456,7 +476,6 @@ struct move_extent {
__u64 len; /* block length to be moved */
__u64 moved_len; /* moved block length */
};
#define MAX_DEFRAG_SIZE ((1UL<<31) - 1)

#define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
Expand Down Expand Up @@ -694,7 +713,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
Expand Down Expand Up @@ -841,6 +859,7 @@ struct ext4_sb_info {
unsigned long s_gdb_count; /* Number of group descriptor blocks */
unsigned long s_desc_per_block; /* Number of group descriptors per block */
ext4_group_t s_groups_count; /* Number of groups in the fs */
ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
unsigned long s_overhead_last; /* Last calculated overhead */
unsigned long s_blocks_last; /* Last seen block count */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
Expand Down Expand Up @@ -950,6 +969,7 @@ struct ext4_sb_info {
atomic_t s_mb_lost_chunks;
atomic_t s_mb_preallocated;
atomic_t s_mb_discarded;
atomic_t s_lock_busy;

/* locality groups */
struct ext4_locality_group *s_locality_groups;
Expand Down Expand Up @@ -1340,8 +1360,6 @@ extern void ext4_mb_free_blocks(handle_t *, struct inode *,
ext4_fsblk_t, unsigned long, int, unsigned long *);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
ext4_grpblk_t add);
extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
ext4_group_t, int);
Expand All @@ -1367,6 +1385,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern int ext4_can_truncate(struct inode *inode);
extern void ext4_truncate(struct inode *);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern int ext4_alloc_da_blocks(struct inode *inode);
Expand Down Expand Up @@ -1575,31 +1594,61 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
struct ext4_group_info {
unsigned long bb_state;
struct rb_root bb_free_root;
unsigned short bb_first_free;
unsigned short bb_free;
unsigned short bb_fragments;
ext4_grpblk_t bb_first_free; /* first free block */
ext4_grpblk_t bb_free; /* total free blocks */
ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
struct list_head bb_prealloc_list;
#ifdef DOUBLE_CHECK
void *bb_bitmap;
#endif
struct rw_semaphore alloc_sem;
unsigned short bb_counters[];
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
* regions, index is order.
* bb_counters[3] = 5 means
* 5 free 8-block regions. */
};

#define EXT4_GROUP_INFO_NEED_INIT_BIT 0

#define EXT4_MB_GRP_NEED_INIT(grp) \
(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))

#define EXT4_MAX_CONTENTION 8
#define EXT4_CONTENTION_THRESHOLD 2

static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
ext4_group_t group)
{
return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
}

/*
* Returns true if the filesystem is busy enough that attempts to
* access the block group locks has run into contention.
*/
static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
{
return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
}

static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
{
spin_lock(ext4_group_lock_ptr(sb, group));
spinlock_t *lock = ext4_group_lock_ptr(sb, group);
if (spin_trylock(lock))
/*
* We're able to grab the lock right away, so drop the
* lock contention counter.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
else {
/*
* The lock is busy, so bump the contention counter,
* and then wait on the spin lock.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
EXT4_MAX_CONTENTION);
spin_lock(lock);
}
}

static inline void ext4_unlock_group(struct super_block *sb,
Expand Down
4 changes: 2 additions & 2 deletions fs/ext4/ext4_extents.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@
#define CHECK_BINSEARCH__

/*
* If EXT_DEBUG is defined you can use the 'extdebug' mount option
* to get lots of info about what's going on.
* Turn on EXT_DEBUG to get lots of info about extents operations.
*/
#define EXT_DEBUG__
#ifdef EXT_DEBUG
Expand Down Expand Up @@ -138,6 +137,7 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
#define EXT_BREAK 1
#define EXT_REPEAT 2

/* Maximum logical block in a file; ext4_extent's ee_block is __le32 */
#define EXT_MAX_BLOCK 0xffffffff

/*
Expand Down
9 changes: 6 additions & 3 deletions fs/ext4/ext4_jbd2.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
handle, err);
}
else
brelse(bh);
bforget(bh);
return err;
}

Expand All @@ -60,7 +60,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
handle, err);
}
else
brelse(bh);
bforget(bh);
return err;
}

Expand Down Expand Up @@ -89,7 +89,10 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
ext4_journal_abort_handle(where, __func__, bh,
handle, err);
} else {
mark_buffer_dirty(bh);
if (inode && bh)
mark_buffer_dirty_inode(bh, inode);
else
mark_buffer_dirty(bh);
if (inode && inode_needs_sync(inode)) {
sync_dirty_buffer(bh);
if (buffer_req(bh) && !buffer_uptodate(bh)) {
Expand Down
Loading

0 comments on commit 3530c18

Please sign in to comment.