Skip to content

Commit

Permalink
dax: don't abuse get_block mapping for endio callbacks
Browse files Browse the repository at this point in the history
dax_fault() currently relies on the get_block callback to attach an
io completion callback to the mapping buffer head so that it can
run unwritten extent conversion after zeroing allocated blocks.

Instead of this hack, pass the conversion callback directly into
dax_fault() similar to the get_block callback. When the filesystem
allocates unwritten extents, it will set the buffer_unwritten()
flag, and hence the dax_fault code can call the completion function
in the contexts where it is necessary without overloading the
mapping buffer head.

Note: The changes to ext4 to use this interface are suspect at best.
In fact, the way ext4 did this end_io assignment in the first place
looks suspect because it only set a completion callback when there
wasn't already some other write() call taking place on the same
inode. The ext4 end_io code looks rather intricate and fragile with
all it's reference counting and passing to different contexts for
modification via inode private pointers that aren't protected by
locks...

Signed-off-by: Dave Chinner <[email protected]>
Acked-by: Jan Kara <[email protected]>
Signed-off-by: Dave Chinner <[email protected]>
  • Loading branch information
Dave Chinner authored and dchinner committed Jun 3, 2015
1 parent ec56b1f commit e842f29
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 26 deletions.
21 changes: 15 additions & 6 deletions fs/dax.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,14 +309,11 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
out:
i_mmap_unlock_read(mapping);

if (bh->b_end_io)
bh->b_end_io(bh, 1);

return error;
}

static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block)
get_block_t get_block, dax_iodone_t complete_unwritten)
{
struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
Expand Down Expand Up @@ -417,7 +414,19 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
page_cache_release(page);
}

/*
* If we successfully insert the new mapping over an unwritten extent,
* we need to ensure we convert the unwritten extent. If there is an
* error inserting the mapping, the filesystem needs to leave it as
* unwritten to prevent exposure of the stale underlying data to
* userspace, but we still need to call the completion function so
* the private resources on the mapping buffer can be released. We
* indicate what the callback should do via the uptodate variable, same
* as for normal BH based IO completions.
*/
error = dax_insert_mapping(inode, &bh, vma, vmf);
if (buffer_unwritten(&bh))
complete_unwritten(&bh, !error);

out:
if (error == -ENOMEM)
Expand Down Expand Up @@ -445,7 +454,7 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
* fault handler for DAX files.
*/
int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block)
get_block_t get_block, dax_iodone_t complete_unwritten)
{
int result;
struct super_block *sb = file_inode(vma->vm_file)->i_sb;
Expand All @@ -454,7 +463,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
}
result = do_dax_fault(vma, vmf, get_block);
result = do_dax_fault(vma, vmf, get_block, complete_unwritten);
if (vmf->flags & FAULT_FLAG_WRITE)
sb_end_pagefault(sb);

Expand Down
4 changes: 2 additions & 2 deletions fs/ext2/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@
#ifdef CONFIG_FS_DAX
static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
return dax_fault(vma, vmf, ext2_get_block);
return dax_fault(vma, vmf, ext2_get_block, NULL);
}

static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
return dax_mkwrite(vma, vmf, ext2_get_block);
return dax_mkwrite(vma, vmf, ext2_get_block, NULL);
}

static const struct vm_operations_struct ext2_dax_vm_ops = {
Expand Down
16 changes: 14 additions & 2 deletions fs/ext4/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,15 +192,27 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}

#ifdef CONFIG_FS_DAX
static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
{
struct inode *inode = bh->b_assoc_map->host;
/* XXX: breaks on 32-bit > 16GB. Is that even supported? */
loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
int err;
if (!uptodate)
return;
WARN_ON(!buffer_unwritten(bh));
err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
}

static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
return dax_fault(vma, vmf, ext4_get_block);
return dax_fault(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
/* Is this the right get_block? */
}

static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
return dax_mkwrite(vma, vmf, ext4_get_block);
return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
}

static const struct vm_operations_struct ext4_dax_vm_ops = {
Expand Down
21 changes: 7 additions & 14 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -656,18 +656,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
return retval;
}

static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
{
struct inode *inode = bh->b_assoc_map->host;
/* XXX: breaks on 32-bit > 16GB. Is that even supported? */
loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
int err;
if (!uptodate)
return;
WARN_ON(!buffer_unwritten(bh));
err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
}

/* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096

Expand Down Expand Up @@ -705,10 +693,15 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,

map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {
if (IS_DAX(inode) && buffer_unwritten(bh)) {
/*
* dgc: I suspect unwritten conversion on ext4+DAX is
* fundamentally broken here when there are concurrent
* read/write in progress on this inode.
*/
WARN_ON_ONCE(io_end);
bh->b_assoc_map = inode->i_mapping;
bh->b_private = (void *)(unsigned long)iblock;
bh->b_end_io = ext4_end_io_unwritten;
}
if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
set_buffer_defer_completion(bh);
Expand Down
6 changes: 4 additions & 2 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
ssize_t bytes, void *private);
typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);

#define MAY_EXEC 0x00000001
#define MAY_WRITE 0x00000002
Expand Down Expand Up @@ -2627,9 +2628,10 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
int dax_clear_blocks(struct inode *, sector_t block, long size);
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
int dax_truncate_page(struct inode *, loff_t from, get_block_t);
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t);
int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb)
#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod)

#ifdef CONFIG_BLOCK
typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
Expand Down

0 comments on commit e842f29

Please sign in to comment.