Skip to content

Commit

Permalink
dax: remove the copy_from_iter and copy_to_iter methods
Browse files Browse the repository at this point in the history
These methods indirect the actual DAX read/write path.  In the end pmem
uses magic flush and mc safe variants and fuse and dcssblk use plain ones
while device mapper picks redirects to the underlying device.

Add set_dax_nocache() and set_dax_nomc() APIs to control which copy
routines are used to remove indirect call from the read/write fast path
as well as a lot of boilerplate code.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Vivek Goyal <[email protected]> [virtiofs]
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Dan Williams <[email protected]>
  • Loading branch information
Christoph Hellwig authored and djbw committed Dec 18, 2021
1 parent 30c6828 commit 7ac5360
Show file tree
Hide file tree
Showing 12 changed files with 41 additions and 237 deletions.
2 changes: 2 additions & 0 deletions drivers/dax/bus.c
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,8 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
goto err_alloc_dax;
}
set_dax_synchronous(dax_dev);
set_dax_nocache(dax_dev);
set_dax_nomc(dax_dev);

/* a device_dax instance is dead while the driver is not attached */
kill_dax(dax_dev);
Expand Down
36 changes: 32 additions & 4 deletions drivers/dax/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ enum dax_device_flags {
DAXDEV_WRITE_CACHE,
/* flag to check if device supports synchronous flush */
DAXDEV_SYNC,
/* do not leave the caches dirty after writes */
DAXDEV_NOCACHE,
/* handle CPU fetch exceptions during reads */
DAXDEV_NOMC,
};

/**
Expand Down Expand Up @@ -146,19 +150,31 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
if (!dax_alive(dax_dev))
return 0;

return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i);
/*
* The userspace address for the memory copy has already been validated
* via access_ok() in vfs_write, so use the 'no check' version to bypass
* the HARDENED_USERCOPY overhead.
*/
if (test_bit(DAXDEV_NOCACHE, &dax_dev->flags))
return _copy_from_iter_flushcache(addr, bytes, i);
return _copy_from_iter(addr, bytes, i);
}
EXPORT_SYMBOL_GPL(dax_copy_from_iter);

size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
{
if (!dax_alive(dax_dev))
return 0;

return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i);
/*
* The userspace address for the memory copy has already been validated
* via access_ok() in vfs_red, so use the 'no check' version to bypass
* the HARDENED_USERCOPY overhead.
*/
if (test_bit(DAXDEV_NOMC, &dax_dev->flags))
return _copy_mc_to_iter(addr, bytes, i);
return _copy_to_iter(addr, bytes, i);
}
EXPORT_SYMBOL_GPL(dax_copy_to_iter);

int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages)
Expand Down Expand Up @@ -220,6 +236,18 @@ void set_dax_synchronous(struct dax_device *dax_dev)
}
EXPORT_SYMBOL_GPL(set_dax_synchronous);

void set_dax_nocache(struct dax_device *dax_dev)
{
set_bit(DAXDEV_NOCACHE, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(set_dax_nocache);

void set_dax_nomc(struct dax_device *dax_dev)
{
set_bit(DAXDEV_NOMC, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(set_dax_nomc);

bool dax_alive(struct dax_device *dax_dev)
{
lockdep_assert_held(&dax_srcu);
Expand Down
20 changes: 0 additions & 20 deletions drivers/md/dm-linear.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,22 +180,6 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
}

static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);

return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}

static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);

return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}

static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages)
{
Expand All @@ -206,8 +190,6 @@ static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,

#else
#define linear_dax_direct_access NULL
#define linear_dax_copy_from_iter NULL
#define linear_dax_copy_to_iter NULL
#define linear_dax_zero_page_range NULL
#endif

Expand All @@ -225,8 +207,6 @@ static struct target_type linear_target = {
.prepare_ioctl = linear_prepare_ioctl,
.iterate_devices = linear_iterate_devices,
.direct_access = linear_dax_direct_access,
.dax_copy_from_iter = linear_dax_copy_from_iter,
.dax_copy_to_iter = linear_dax_copy_to_iter,
.dax_zero_page_range = linear_dax_zero_page_range,
};

Expand Down
80 changes: 0 additions & 80 deletions drivers/md/dm-log-writes.c
Original file line number Diff line number Diff line change
Expand Up @@ -902,51 +902,6 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
}

#if IS_ENABLED(CONFIG_FS_DAX)
static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
struct iov_iter *i)
{
struct pending_block *block;

if (!bytes)
return 0;

block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
if (!block) {
DMERR("Error allocating dax pending block");
return -ENOMEM;
}

block->data = kzalloc(bytes, GFP_KERNEL);
if (!block->data) {
DMERR("Error allocating dax data space");
kfree(block);
return -ENOMEM;
}

/* write data provided via the iterator */
if (!copy_from_iter(block->data, bytes, i)) {
DMERR("Error copying dax data");
kfree(block->data);
kfree(block);
return -EIO;
}

/* rewind the iterator so that the block driver can use it */
iov_iter_revert(i, bytes);

block->datalen = bytes;
block->sector = bio_to_dev_sectors(lc, sector);
block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;

atomic_inc(&lc->pending_blocks);
spin_lock_irq(&lc->blocks_lock);
list_add_tail(&block->list, &lc->unflushed_blocks);
spin_unlock_irq(&lc->blocks_lock);
wake_up_process(lc->log_kthread);

return 0;
}

static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
pgoff_t *pgoff)
{
Expand All @@ -964,37 +919,6 @@ static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
}

static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
pgoff_t pgoff, void *addr, size_t bytes,
struct iov_iter *i)
{
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
int err;

/* Don't bother doing anything if logging has been disabled */
if (!lc->logging_enabled)
goto dax_copy;

err = log_dax(lc, sector, bytes, i);
if (err) {
DMWARN("Error %d logging DAX write", err);
return 0;
}
dax_copy:
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}

static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
pgoff_t pgoff, void *addr, size_t bytes,
struct iov_iter *i)
{
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);

return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}

static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages)
{
Expand All @@ -1005,8 +929,6 @@ static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,

#else
#define log_writes_dax_direct_access NULL
#define log_writes_dax_copy_from_iter NULL
#define log_writes_dax_copy_to_iter NULL
#define log_writes_dax_zero_page_range NULL
#endif

Expand All @@ -1024,8 +946,6 @@ static struct target_type log_writes_target = {
.iterate_devices = log_writes_iterate_devices,
.io_hints = log_writes_io_hints,
.direct_access = log_writes_dax_direct_access,
.dax_copy_from_iter = log_writes_dax_copy_from_iter,
.dax_copy_to_iter = log_writes_dax_copy_to_iter,
.dax_zero_page_range = log_writes_dax_zero_page_range,
};

Expand Down
20 changes: 0 additions & 20 deletions drivers/md/dm-stripe.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,22 +324,6 @@ static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
}

static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);

return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}

static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);

return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}

static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages)
{
Expand All @@ -350,8 +334,6 @@ static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,

#else
#define stripe_dax_direct_access NULL
#define stripe_dax_copy_from_iter NULL
#define stripe_dax_copy_to_iter NULL
#define stripe_dax_zero_page_range NULL
#endif

Expand Down Expand Up @@ -488,8 +470,6 @@ static struct target_type stripe_target = {
.iterate_devices = stripe_iterate_devices,
.io_hints = stripe_io_hints,
.direct_access = stripe_dax_direct_access,
.dax_copy_from_iter = stripe_dax_copy_from_iter,
.dax_copy_to_iter = stripe_dax_copy_to_iter,
.dax_zero_page_range = stripe_dax_zero_page_range,
};

Expand Down
52 changes: 2 additions & 50 deletions drivers/md/dm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1027,54 +1027,6 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
return ret;
}

static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
long ret = 0;
int srcu_idx;

ti = dm_dax_get_live_target(md, sector, &srcu_idx);

if (!ti)
goto out;
if (!ti->type->dax_copy_from_iter) {
ret = copy_from_iter(addr, bytes, i);
goto out;
}
ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i);
out:
dm_put_live_table(md, srcu_idx);

return ret;
}

static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
long ret = 0;
int srcu_idx;

ti = dm_dax_get_live_target(md, sector, &srcu_idx);

if (!ti)
goto out;
if (!ti->type->dax_copy_to_iter) {
ret = copy_to_iter(addr, bytes, i);
goto out;
}
ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i);
out:
dm_put_live_table(md, srcu_idx);

return ret;
}

static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages)
{
Expand Down Expand Up @@ -1770,6 +1722,8 @@ static struct mapped_device *alloc_dev(int minor)
md->dax_dev = NULL;
goto bad;
}
set_dax_nocache(md->dax_dev);
set_dax_nomc(md->dax_dev);
if (dax_add_host(md->dax_dev, md->disk))
goto bad;
}
Expand Down Expand Up @@ -3024,8 +2978,6 @@ static const struct block_device_operations dm_rq_blk_dops = {

static const struct dax_operations dm_dax_ops = {
.direct_access = dm_dax_direct_access,
.copy_from_iter = dm_dax_copy_from_iter,
.copy_to_iter = dm_dax_copy_to_iter,
.zero_page_range = dm_dax_zero_page_range,
};

Expand Down
20 changes: 2 additions & 18 deletions drivers/nvdimm/pmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,26 +301,8 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
}

/*
* Bounds checking, both file offset and device offset, is handled by
* dax_iomap_actor()
*/
static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return _copy_from_iter_flushcache(addr, bytes, i);
}

static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return _copy_mc_to_iter(addr, bytes, i);
}

static const struct dax_operations pmem_dax_ops = {
.direct_access = pmem_dax_direct_access,
.copy_from_iter = pmem_copy_from_iter,
.copy_to_iter = pmem_copy_to_iter,
.zero_page_range = pmem_dax_zero_page_range,
};

Expand Down Expand Up @@ -497,6 +479,8 @@ static int pmem_attach_disk(struct device *dev,
rc = PTR_ERR(dax_dev);
goto out;
}
set_dax_nocache(dax_dev);
set_dax_nomc(dax_dev);
if (is_nvdimm_sync(nd_region))
set_dax_synchronous(dax_dev);
rc = dax_add_host(dax_dev, disk);
Expand Down
Loading

0 comments on commit 7ac5360

Please sign in to comment.