Skip to content

Commit

Permalink
ocfs2: fix data corruption by fallocate
Browse files Browse the repository at this point in the history
When fallocate punches holes out of inode size, if original isize is in
the middle of last cluster, then the part from isize to the end of the
cluster will be zeroed with buffer write, at that time isize is not yet
updated to match the new size, if writeback is kicked in, it will invoke
ocfs2_writepage()->block_write_full_page() where the pages out of inode
size will be dropped.  That will cause file corruption.  Fix this by
zero out eof blocks when extending the inode size.

Running the following command with qemu-image 4.2.1 can get a corrupted
coverted image file easily.

    qemu-img convert -p -t none -T none -f qcow2 $qcow_image \
             -O qcow2 -o compat=1.1 $qcow_image.conv

The usage of fallocate in qemu is like this, it first punches holes out
of inode size, then extend the inode size.

    fallocate(11, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 2276196352, 65536) = 0
    fallocate(11, 0, 2276196352, 65536) = 0

v1: https://www.spinics.net/lists/linux-fsdevel/msg193999.html
v2: https://lore.kernel.org/linux-fsdevel/[email protected]/T/

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Junxiao Bi <[email protected]>
Reviewed-by: Joseph Qi <[email protected]>
Cc: Jan Kara <[email protected]>
Cc: Mark Fasheh <[email protected]>
Cc: Joel Becker <[email protected]>
Cc: Changwei Ge <[email protected]>
Cc: Gang He <[email protected]>
Cc: Jun Piao <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
biger410 authored and torvalds committed Jun 5, 2021
1 parent 415f0c8 commit 6bba447
Showing 1 changed file with 50 additions and 5 deletions.
55 changes: 50 additions & 5 deletions fs/ocfs2/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1855,6 +1855,45 @@ int ocfs2_remove_inode_range(struct inode *inode,
return ret;
}

/*
* zero out partial blocks of one cluster.
*
* start: file offset where zero starts, will be made upper block aligned.
* len: it will be trimmed to the end of current cluster if "start + len"
* is bigger than it.
*/
static int ocfs2_zeroout_partial_cluster(struct inode *inode,
u64 start, u64 len)
{
int ret;
u64 start_block, end_block, nr_blocks;
u64 p_block, offset;
u32 cluster, p_cluster, nr_clusters;
struct super_block *sb = inode->i_sb;
u64 end = ocfs2_align_bytes_to_clusters(sb, start);

if (start + len < end)
end = start + len;

start_block = ocfs2_blocks_for_bytes(sb, start);
end_block = ocfs2_blocks_for_bytes(sb, end);
nr_blocks = end_block - start_block;
if (!nr_blocks)
return 0;

cluster = ocfs2_bytes_to_clusters(sb, start);
ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
&nr_clusters, NULL);
if (ret)
return ret;
if (!p_cluster)
return 0;

offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
}

/*
* Parts of this function taken from xfs_change_file_space()
*/
Expand All @@ -1865,7 +1904,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
{
int ret;
s64 llen;
loff_t size;
loff_t size, orig_isize;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct buffer_head *di_bh = NULL;
handle_t *handle;
Expand Down Expand Up @@ -1896,14 +1935,15 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
goto out_inode_unlock;
}

orig_isize = i_size_read(inode);
switch (sr->l_whence) {
case 0: /*SEEK_SET*/
break;
case 1: /*SEEK_CUR*/
sr->l_start += f_pos;
break;
case 2: /*SEEK_END*/
sr->l_start += i_size_read(inode);
sr->l_start += orig_isize;
break;
default:
ret = -EINVAL;
Expand Down Expand Up @@ -1957,6 +1997,14 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
default:
ret = -EINVAL;
}

/* zeroout eof blocks in the cluster. */
if (!ret && change_size && orig_isize < size) {
ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
size - orig_isize);
if (!ret)
i_size_write(inode, size);
}
up_write(&OCFS2_I(inode)->ip_alloc_sem);
if (ret) {
mlog_errno(ret);
Expand All @@ -1973,9 +2021,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
goto out_inode_unlock;
}

if (change_size && i_size_read(inode) < size)
i_size_write(inode, size);

inode->i_ctime = inode->i_mtime = current_time(inode);
ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
if (ret < 0)
Expand Down

0 comments on commit 6bba447

Please sign in to comment.