Skip to content

Commit

Permalink
block: use DAX for partition table reads
Browse files Browse the repository at this point in the history
Avoid populating pagecache when the block device is in DAX mode.
Otherwise these page cache entries collide with the fsync/msync
implementation and break data durability guarantees.

Cc: Jan Kara <[email protected]>
Cc: Jeff Moyer <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Dave Chinner <[email protected]>
Cc: Andrew Morton <[email protected]>
Reported-by: Ross Zwisler <[email protected]>
Tested-by: Ross Zwisler <[email protected]>
Reviewed-by: Matthew Wilcox <[email protected]>
Signed-off-by: Dan Williams <[email protected]>
  • Loading branch information
djbw committed Jan 30, 2016
1 parent 9f4736f commit d1a5f2b
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 3 deletions.
18 changes: 15 additions & 3 deletions block/partition-generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <linux/kmod.h>
#include <linux/ctype.h>
#include <linux/genhd.h>
#include <linux/dax.h>
#include <linux/blktrace_api.h>

#include "partitions/check.h"
Expand Down Expand Up @@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
return 0;
}

unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;

return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
NULL);
}

unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
{
struct page *page;

page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
NULL);
/* don't populate page cache for dax capable devices */
if (IS_DAX(bdev->bd_inode))
page = read_dax_sector(bdev, n);
else
page = read_pagecache_sector(bdev, n);

if (!IS_ERR(page)) {
if (PageError(page))
goto fail;
Expand Down
20 changes: 20 additions & 0 deletions fs/dax.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
blk_queue_exit(bdev->bd_queue);
}

struct page *read_dax_sector(struct block_device *bdev, sector_t n)
{
struct page *page = alloc_pages(GFP_KERNEL, 0);
struct blk_dax_ctl dax = {
.size = PAGE_SIZE,
.sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
};
long rc;

if (!page)
return ERR_PTR(-ENOMEM);

rc = dax_map_atomic(bdev, &dax);
if (rc < 0)
return ERR_PTR(rc);
memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
dax_unmap_atomic(bdev, &dax);
return page;
}

/*
* dax_clear_blocks() is called from within transaction context from XFS,
* and hence this means the stack from this point must follow GFP_NOFS
Expand Down
11 changes: 11 additions & 0 deletions include/linux/dax.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t);
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t);

#ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
#else
static inline struct page *read_dax_sector(struct block_device *bdev,
sector_t n)
{
return ERR_PTR(-ENXIO);
}
#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
unsigned int flags, get_block_t, dax_iodone_t);
Expand Down

0 comments on commit d1a5f2b

Please sign in to comment.