Skip to content

Commit

Permalink
ext4: add prefetch_block_bitmaps mount option
Browse files Browse the repository at this point in the history
For file systems where we can afford to keep the buddy bitmaps cached,
we can speed up initial writes to large file systems by starting to
load the block allocation bitmaps as soon as the file system is
mounted.  This won't work well for _super_ large file systems, or
memory constrained systems, so we only enable this when it is
requested via a mount option.

Addresses-Google-Bug: 159488342
Signed-off-by: Theodore Ts'o <[email protected]>
Reviewed-by: Andreas Dilger <[email protected]>
  • Loading branch information
tytso committed Aug 7, 2020
1 parent ab74c7b commit 3d392b2
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 23 deletions.
15 changes: 14 additions & 1 deletion fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -1188,6 +1188,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */
#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
Expand Down Expand Up @@ -2334,9 +2335,15 @@ struct ext4_lazy_init {
struct mutex li_list_mtx;
};

enum ext4_li_mode {
EXT4_LI_MODE_PREFETCH_BBITMAP,
EXT4_LI_MODE_ITABLE,
};

struct ext4_li_request {
struct super_block *lr_super;
struct ext4_sb_info *lr_sbi;
enum ext4_li_mode lr_mode;
ext4_group_t lr_first_not_zeroed;
ext4_group_t lr_next_group;
struct list_head lr_request;
unsigned long lr_next_sched;
Expand Down Expand Up @@ -2676,6 +2683,12 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
ext4_group_t group,
unsigned int nr, int *cnt);
extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
unsigned int nr);

extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
unsigned long count, int flags);
Expand Down
10 changes: 4 additions & 6 deletions fs/ext4/mballoc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2232,9 +2232,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
* Start prefetching @nr block bitmaps starting at @group.
* Return the next group which needs to be prefetched.
*/
static ext4_group_t
ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
unsigned int nr, int *cnt)
ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
unsigned int nr, int *cnt)
{
ext4_group_t ngroups = ext4_get_groups_count(sb);
struct buffer_head *bh;
Expand Down Expand Up @@ -2284,9 +2283,8 @@ ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
* waiting for the block allocation bitmap read to finish when
* ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
*/
static void
ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
unsigned int nr)
void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
unsigned int nr)
{
while (nr-- > 0) {
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
Expand Down
57 changes: 41 additions & 16 deletions fs/ext4/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -1521,6 +1521,7 @@ enum {
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
Opt_prefetch_block_bitmaps,
};

static const match_table_t tokens = {
Expand Down Expand Up @@ -1612,6 +1613,7 @@ static const match_table_t tokens = {
{Opt_test_dummy_encryption, "test_dummy_encryption"},
{Opt_nombcache, "nombcache"},
{Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
Expand Down Expand Up @@ -1829,6 +1831,8 @@ static const struct mount_opts {
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_test_dummy_encryption, 0, MOPT_STRING},
{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
MOPT_SET},
{Opt_err, 0, 0}
};

Expand Down Expand Up @@ -3201,15 +3205,34 @@ static void print_daily_error_info(struct timer_list *t)
static int ext4_run_li_request(struct ext4_li_request *elr)
{
struct ext4_group_desc *gdp = NULL;
ext4_group_t group, ngroups;
struct super_block *sb;
struct super_block *sb = elr->lr_super;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
ext4_group_t group = elr->lr_next_group;
unsigned long timeout = 0;
unsigned int prefetch_ios = 0;
int ret = 0;

sb = elr->lr_super;
ngroups = EXT4_SB(sb)->s_groups_count;
if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
elr->lr_next_group = ext4_mb_prefetch(sb, group,
EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
if (prefetch_ios)
ext4_mb_prefetch_fini(sb, elr->lr_next_group,
prefetch_ios);
trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
prefetch_ios);
if (group >= elr->lr_next_group) {
ret = 1;
if (elr->lr_first_not_zeroed != ngroups &&
!sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
elr->lr_next_group = elr->lr_first_not_zeroed;
elr->lr_mode = EXT4_LI_MODE_ITABLE;
ret = 0;
}
}
return ret;
}

for (group = elr->lr_next_group; group < ngroups; group++) {
for (; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
ret = 1;
Expand All @@ -3227,9 +3250,10 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
timeout = jiffies;
ret = ext4_init_inode_table(sb, group,
elr->lr_timeout ? 0 : 1);
trace_ext4_lazy_itable_init(sb, group);
if (elr->lr_timeout == 0) {
timeout = (jiffies - timeout) *
elr->lr_sbi->s_li_wait_mult;
EXT4_SB(elr->lr_super)->s_li_wait_mult;
elr->lr_timeout = timeout;
}
elr->lr_next_sched = jiffies + elr->lr_timeout;
Expand All @@ -3244,15 +3268,11 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
*/
static void ext4_remove_li_request(struct ext4_li_request *elr)
{
struct ext4_sb_info *sbi;

if (!elr)
return;

sbi = elr->lr_sbi;

list_del(&elr->lr_request);
sbi->s_li_request = NULL;
EXT4_SB(elr->lr_super)->s_li_request = NULL;
kfree(elr);
}

Expand Down Expand Up @@ -3461,16 +3481,20 @@ static int ext4_li_info_new(void)
static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
ext4_group_t start)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;

elr = kzalloc(sizeof(*elr), GFP_KERNEL);
if (!elr)
return NULL;

elr->lr_super = sb;
elr->lr_sbi = sbi;
elr->lr_next_group = start;
elr->lr_first_not_zeroed = start;
if (test_opt(sb, PREFETCH_BLOCK_BITMAPS))
elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
else {
elr->lr_mode = EXT4_LI_MODE_ITABLE;
elr->lr_next_group = start;
}

/*
* Randomize first schedule time of the request to
Expand Down Expand Up @@ -3500,8 +3524,9 @@ int ext4_register_li_request(struct super_block *sb,
goto out;
}

if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
!test_opt(sb, INIT_INODE_TABLE))
if (!test_opt(sb, PREFETCH_BLOCK_BITMAPS) &&
(first_not_zeroed == ngroups || sb_rdonly(sb) ||
!test_opt(sb, INIT_INODE_TABLE)))
goto out;

elr = ext4_li_request_new(sb, first_not_zeroed);
Expand Down
44 changes: 44 additions & 0 deletions include/trace/events/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -2742,6 +2742,50 @@ TRACE_EVENT(ext4_error,
__entry->function, __entry->line)
);

TRACE_EVENT(ext4_prefetch_bitmaps,
TP_PROTO(struct super_block *sb, ext4_group_t group,
ext4_group_t next, unsigned int prefetch_ios),

TP_ARGS(sb, group, next, prefetch_ios),

TP_STRUCT__entry(
__field( dev_t, dev )
__field( __u32, group )
__field( __u32, next )
__field( __u32, ios )
),

TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->group = group;
__entry->next = next;
__entry->ios = prefetch_ios;
),

TP_printk("dev %d,%d group %u next %u ios %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->group, __entry->next, __entry->ios)
);

TRACE_EVENT(ext4_lazy_itable_init,
TP_PROTO(struct super_block *sb, ext4_group_t group),

TP_ARGS(sb, group),

TP_STRUCT__entry(
__field( dev_t, dev )
__field( __u32, group )
),

TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->group = group;
),

TP_printk("dev %d,%d group %u",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group)
);

#endif /* _TRACE_EXT4_H */

/* This part must be outside protection */
Expand Down

0 comments on commit 3d392b2

Please sign in to comment.