Skip to content

Commit

Permalink
GFS2: rewrite fallocate code to write blocks directly
Browse files Browse the repository at this point in the history
GFS2's fallocate code currently goes through the page cache. Since it's only
writing to the end of the file or to holes in it, it doesn't need to, and it
was causing issues on low memory environments. This patch pulls in some of
Steve's block allocation work, and uses it to simply allocate the blocks for
the file, and zero them out at allocation time.  It provides a slight
performance increase, and it dramatically simplifies the code.

Signed-off-by: Benjamin Marzinski <[email protected]>
Signed-off-by: Steven Whitehouse <[email protected]>
  • Loading branch information
bmarzins authored and swhiteho committed Oct 21, 2011
1 parent bd5437a commit 64dd153
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 147 deletions.
12 changes: 12 additions & 0 deletions fs/gfs2/bmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>

Expand Down Expand Up @@ -427,12 +428,14 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct super_block *sb = sdp->sd_vfs;
struct buffer_head *dibh = mp->mp_bh[0];
u64 bn, dblock = 0;
unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
unsigned dblks = 0;
unsigned ptrs_per_blk;
const unsigned end_of_metadata = height - 1;
int ret;
int eob = 0;
enum alloc_state state;
__be64 *ptr;
Expand Down Expand Up @@ -535,6 +538,15 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
dblock = bn;
while (n-- > 0)
*ptr++ = cpu_to_be64(bn++);
if (buffer_zeronew(bh_map)) {
ret = sb_issue_zeroout(sb, dblock, dblks,
GFP_NOFS);
if (ret) {
fs_err(sdp,
"Failed to zero data buffers\n");
clear_buffer_zeronew(bh_map);
}
}
break;
}
} while ((state != ALLOC_DATA) || !dblock);
Expand Down
171 changes: 24 additions & 147 deletions fs/gfs2/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -669,135 +669,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
return generic_file_aio_write(iocb, iov, nr_segs, pos);
}

static int empty_write_end(struct page *page, unsigned from,
unsigned to, int mode)
{
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *bh;
unsigned offset, blksize = 1 << inode->i_blkbits;
pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;

zero_user(page, from, to-from);
mark_page_accessed(page);

if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
if (!gfs2_is_writeback(ip))
gfs2_page_add_databufs(ip, page, from, to);

block_commit_write(page, from, to);
return 0;
}

offset = 0;
bh = page_buffers(page);
while (offset < to) {
if (offset >= from) {
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
clear_buffer_new(bh);
write_dirty_buffer(bh, WRITE);
}
offset += blksize;
bh = bh->b_this_page;
}

offset = 0;
bh = page_buffers(page);
while (offset < to) {
if (offset >= from) {
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
return -EIO;
}
offset += blksize;
bh = bh->b_this_page;
}
return 0;
}

static int needs_empty_write(sector_t block, struct inode *inode)
{
int error;
struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };

bh_map.b_size = 1 << inode->i_blkbits;
error = gfs2_block_map(inode, block, &bh_map, 0);
if (unlikely(error))
return error;
return !buffer_mapped(&bh_map);
}

static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
int mode)
{
struct inode *inode = page->mapping->host;
unsigned start, end, next, blksize;
sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
int ret;

blksize = 1 << inode->i_blkbits;
next = end = 0;
while (next < from) {
next += blksize;
block++;
}
start = next;
do {
next += blksize;
ret = needs_empty_write(block, inode);
if (unlikely(ret < 0))
return ret;
if (ret == 0) {
if (end) {
ret = __block_write_begin(page, start, end - start,
gfs2_block_map);
if (unlikely(ret))
return ret;
ret = empty_write_end(page, start, end, mode);
if (unlikely(ret))
return ret;
end = 0;
}
start = next;
}
else
end = next;
block++;
} while (next < to);

if (end) {
ret = __block_write_begin(page, start, end - start, gfs2_block_map);
if (unlikely(ret))
return ret;
ret = empty_write_end(page, start, end, mode);
if (unlikely(ret))
return ret;
}

return 0;
}

static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
int mode)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *dibh;
int error;
u64 start = offset >> PAGE_CACHE_SHIFT;
unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
pgoff_t curr;
struct page *page;
unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
unsigned int from, to;

if (!end_offset)
end_offset = PAGE_CACHE_SIZE;
unsigned int nr_blks;
sector_t lblock = offset >> inode->i_blkbits;

error = gfs2_meta_inode_buffer(ip, &dibh);
if (unlikely(error))
goto out;
return error;

gfs2_trans_add_bh(ip->i_gl, dibh, 1);

Expand All @@ -807,39 +690,31 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
goto out;
}

curr = start;
offset = start << PAGE_CACHE_SHIFT;
from = start_offset;
to = PAGE_CACHE_SIZE;
while (curr <= end) {
page = grab_cache_page_write_begin(inode->i_mapping, curr,
AOP_FLAG_NOFS);
if (unlikely(!page)) {
error = -ENOMEM;
goto out;
}
while (len) {
struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
bh_map.b_size = len;
set_buffer_zeronew(&bh_map);

if (curr == end)
to = end_offset;
error = write_empty_blocks(page, from, to, mode);
if (!error && offset + to > inode->i_size &&
!(mode & FALLOC_FL_KEEP_SIZE)) {
i_size_write(inode, offset + to);
}
unlock_page(page);
page_cache_release(page);
if (error)
error = gfs2_block_map(inode, lblock, &bh_map, 1);
if (unlikely(error))
goto out;
curr++;
offset += PAGE_CACHE_SIZE;
from = 0;
len -= bh_map.b_size;
nr_blks = bh_map.b_size >> inode->i_blkbits;
lblock += nr_blks;
if (!buffer_new(&bh_map))
continue;
if (unlikely(!buffer_zeronew(&bh_map))) {
error = -EIO;
goto out;
}
}
if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
i_size_write(inode, offset + len);

mark_inode_dirty(inode);

brelse(dibh);

out:
brelse(dibh);
return error;
}

Expand Down Expand Up @@ -879,6 +754,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
int error;
loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
loff_t max_chunk_size = UINT_MAX & bsize_mask;
next = (next + 1) << sdp->sd_sb.sb_bsize_shift;

/* We only support the FALLOC_FL_KEEP_SIZE mode */
Expand Down Expand Up @@ -932,7 +808,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
goto out_qunlock;
}
max_bytes = bytes;
calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
&max_bytes, &data_blocks, &ind_blocks);
al->al_requested = data_blocks + ind_blocks;

rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
Expand Down
3 changes: 3 additions & 0 deletions fs/gfs2/incore.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,15 @@ struct gfs2_rgrpd {
enum gfs2_state_bits {
BH_Pinned = BH_PrivateStart,
BH_Escaped = BH_PrivateStart + 1,
BH_Zeronew = BH_PrivateStart + 2,
};

BUFFER_FNS(Pinned, pinned)
TAS_BUFFER_FNS(Pinned, pinned)
BUFFER_FNS(Escaped, escaped)
TAS_BUFFER_FNS(Escaped, escaped)
BUFFER_FNS(Zeronew, zeronew)
TAS_BUFFER_FNS(Zeronew, zeronew)

struct gfs2_bufdata {
struct buffer_head *bd_bh;
Expand Down

0 comments on commit 64dd153

Please sign in to comment.