Skip to content

Commit

Permalink
block: Do not discard buffers under a mounted filesystem
Browse files Browse the repository at this point in the history
Discarding blocks and buffers under a mounted filesystem is hardly
anything admin wants to do. Usually it will confuse the filesystem and
sometimes the loss of buffer_head state (including b_private field) can
even cause crashes like:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
PGD 0 P4D 0
Oops: 0002 [#1] SMP PTI
CPU: 4 PID: 203778 Comm: jbd2/dm-3-8 Kdump: loaded Tainted: G O     --------- -  - 4.18.0-147.5.0.5.h126.eulerosv2r9.x86_64 #1
Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 1.57 08/11/2015
RIP: 0010:jbd2_journal_grab_journal_head+0x1b/0x40 [jbd2]
...
Call Trace:
 __jbd2_journal_insert_checkpoint+0x23/0x70 [jbd2]
 jbd2_journal_commit_transaction+0x155f/0x1b60 [jbd2]
 kjournald2+0xbd/0x270 [jbd2]

So if we don't have block device open with O_EXCL already, claim the
block device while we truncate buffer cache. This makes sure any
exclusive block device user (such as filesystem) cannot operate on the
device while we are discarding buffer cache.

Reported-by: Ye Bin <[email protected]>
Signed-off-by: Jan Kara <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
[axboe: fix !CONFIG_BLOCK error in truncate_bdev_range()]
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
jankara authored and axboe committed Sep 8, 2020
1 parent 6dbf7bb commit 384d87e
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 10 deletions.
16 changes: 10 additions & 6 deletions block/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
uint64_t range[2];
uint64_t start, len;
struct request_queue *q = bdev_get_queue(bdev);
struct address_space *mapping = bdev->bd_inode->i_mapping;

int err;

if (!(mode & FMODE_WRITE))
return -EBADF;
Expand All @@ -134,7 +133,11 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,

if (start + len > i_size_read(bdev->bd_inode))
return -EINVAL;
truncate_inode_pages_range(mapping, start, start + len - 1);

err = truncate_bdev_range(bdev, mode, start, start + len - 1);
if (err)
return err;

return blkdev_issue_discard(bdev, start >> 9, len >> 9,
GFP_KERNEL, flags);
}
Expand All @@ -143,8 +146,8 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
unsigned long arg)
{
uint64_t range[2];
struct address_space *mapping;
uint64_t start, end, len;
int err;

if (!(mode & FMODE_WRITE))
return -EBADF;
Expand All @@ -166,8 +169,9 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
return -EINVAL;

/* Invalidate the page cache, including dirty pages */
mapping = bdev->bd_inode->i_mapping;
truncate_inode_pages_range(mapping, start, end);
err = truncate_bdev_range(bdev, mode, start, end);
if (err)
return err;

return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
BLKDEV_ZERO_NOUNMAP);
Expand Down
37 changes: 33 additions & 4 deletions fs/block_dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,35 @@ void invalidate_bdev(struct block_device *bdev)
}
EXPORT_SYMBOL(invalidate_bdev);

/*
* Drop all buffers & page cache for given bdev range. This function bails
* with error if bdev has other exclusive owner (such as filesystem).
*/
int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
loff_t lstart, loff_t lend)
{
struct block_device *claimed_bdev = NULL;
int err;

/*
* If we don't hold exclusive handle for the device, upgrade to it
* while we discard the buffer cache to avoid discarding buffers
* under live filesystem.
*/
if (!(mode & FMODE_EXCL)) {
claimed_bdev = bdev->bd_contains;
err = bd_prepare_to_claim(bdev, claimed_bdev,
truncate_bdev_range);
if (err)
return err;
}
truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
if (claimed_bdev)
bd_abort_claiming(bdev, claimed_bdev, truncate_bdev_range);
return 0;
}
EXPORT_SYMBOL(truncate_bdev_range);

static void set_init_blocksize(struct block_device *bdev)
{
bdev->bd_inode->i_blkbits = blksize_bits(bdev_logical_block_size(bdev));
Expand Down Expand Up @@ -1968,7 +1997,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
loff_t len)
{
struct block_device *bdev = I_BDEV(bdev_file_inode(file));
struct address_space *mapping;
loff_t end = start + len - 1;
loff_t isize;
int error;
Expand Down Expand Up @@ -1996,8 +2024,9 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
return -EINVAL;

/* Invalidate the page cache, including dirty pages. */
mapping = bdev->bd_inode->i_mapping;
truncate_inode_pages_range(mapping, start, end);
error = truncate_bdev_range(bdev, file->f_mode, start, end);
if (error)
return error;

switch (mode) {
case FALLOC_FL_ZERO_RANGE:
Expand All @@ -2024,7 +2053,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
* the caller will be given -EBUSY. The third argument is
* inclusive, so the rounding here is safe.
*/
return invalidate_inode_pages2_range(mapping,
return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
start >> PAGE_SHIFT,
end >> PAGE_SHIFT);
}
Expand Down
7 changes: 7 additions & 0 deletions include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -1987,11 +1987,18 @@ void bdput(struct block_device *);

#ifdef CONFIG_BLOCK
void invalidate_bdev(struct block_device *bdev);
int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
loff_t lend);
int sync_blockdev(struct block_device *bdev);
#else
static inline void invalidate_bdev(struct block_device *bdev)
{
}
static inline int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
loff_t lstart, loff_t lend)
{
return 0;
}
static inline int sync_blockdev(struct block_device *bdev)
{
return 0;
Expand Down

0 comments on commit 384d87e

Please sign in to comment.