Skip to content

Commit

Permalink
Merge tag 'libnvdimm-for-5.3' of git://git.kernel.org/pub/scm/linux/k…
Browse files Browse the repository at this point in the history
…ernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "Primarily just the virtio_pmem driver:

   - virtio_pmem

     The new virtio_pmem facility introduces a paravirtualized
     persistent memory device that allows a guest VM to use DAX
     mechanisms to access a host-file with host-page-cache. It arranges
     for MAP_SYNC to be disabled and instead triggers a host fsync()
     when a 'write-cache flush' command is sent to the virtual disk
     device.

   - Miscellaneous small fixups"

* tag 'libnvdimm-for-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  virtio_pmem: fix sparse warning
  xfs: disable map_sync for async flush
  ext4: disable map_sync for async flush
  dax: check synchronous mapping is supported
  dm: enable synchronous dax
  libnvdimm: add dax_dev sync flag
  virtio-pmem: Add virtio pmem driver
  libnvdimm: nd_region flush callback support
  libnvdimm, namespace: Drop uuid_t implementation detail
  • Loading branch information
torvalds committed Jul 18, 2019
2 parents d77e9e4 + 8c2e408 commit f8c3500
Show file tree
Hide file tree
Showing 23 changed files with 508 additions and 38 deletions.
4 changes: 2 additions & 2 deletions drivers/acpi/nfit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2426,7 +2426,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
offset = to_interleave_offset(offset, mmio);

writeq(cmd, mmio->addr.base + offset);
nvdimm_flush(nfit_blk->nd_region);
nvdimm_flush(nfit_blk->nd_region, NULL);

if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
readq(mmio->addr.base + offset);
Expand Down Expand Up @@ -2475,7 +2475,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
}

if (rw)
nvdimm_flush(nfit_blk->nd_region);
nvdimm_flush(nfit_blk->nd_region, NULL);

rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
return rc;
Expand Down
2 changes: 1 addition & 1 deletion drivers/dax/bus.c
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
* No 'host' or dax_operations since there is no access to this
* device outside of mmap of the resulting character device.
*/
dax_dev = alloc_dax(dev_dax, NULL, NULL);
dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
if (!dax_dev)
goto err;

Expand Down
19 changes: 18 additions & 1 deletion drivers/dax/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ enum dax_device_flags {
DAXDEV_ALIVE,
/* gate whether dax_flush() calls the low level flush routine */
DAXDEV_WRITE_CACHE,
/* flag to check if device supports synchronous flush */
DAXDEV_SYNC,
};

/**
Expand Down Expand Up @@ -372,6 +374,18 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev)
}
EXPORT_SYMBOL_GPL(dax_write_cache_enabled);

bool __dax_synchronous(struct dax_device *dax_dev)
{
return test_bit(DAXDEV_SYNC, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(__dax_synchronous);

void __set_dax_synchronous(struct dax_device *dax_dev)
{
set_bit(DAXDEV_SYNC, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(__set_dax_synchronous);

bool dax_alive(struct dax_device *dax_dev)
{
lockdep_assert_held(&dax_srcu);
Expand Down Expand Up @@ -526,7 +540,7 @@ static void dax_add_host(struct dax_device *dax_dev, const char *host)
}

struct dax_device *alloc_dax(void *private, const char *__host,
const struct dax_operations *ops)
const struct dax_operations *ops, unsigned long flags)
{
struct dax_device *dax_dev;
const char *host;
Expand All @@ -549,6 +563,9 @@ struct dax_device *alloc_dax(void *private, const char *__host,
dax_add_host(dax_dev, host);
dax_dev->ops = ops;
dax_dev->private = private;
if (flags & DAXDEV_F_SYNC)
set_dax_synchronous(dax_dev);

return dax_dev;

err_dev:
Expand Down
24 changes: 18 additions & 6 deletions drivers/md/dm-table.c
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,7 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
EXPORT_SYMBOL_GPL(dm_table_set_type);

/* validate the dax capability of the target device span */
static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
int blocksize = *(int *) data;
Expand All @@ -890,7 +890,15 @@ static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
start, len);
}

bool dm_table_supports_dax(struct dm_table *t, int blocksize)
/* Check devices support synchronous DAX */
static int device_synchronous(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
return dax_synchronous(dev->dax_dev);
}

bool dm_table_supports_dax(struct dm_table *t,
iterate_devices_callout_fn iterate_fn, int *blocksize)
{
struct dm_target *ti;
unsigned i;
Expand All @@ -903,8 +911,7 @@ bool dm_table_supports_dax(struct dm_table *t, int blocksize)
return false;

if (!ti->type->iterate_devices ||
!ti->type->iterate_devices(ti, device_supports_dax,
&blocksize))
!ti->type->iterate_devices(ti, iterate_fn, blocksize))
return false;
}

Expand Down Expand Up @@ -940,6 +947,7 @@ static int dm_table_determine_type(struct dm_table *t)
struct dm_target *tgt;
struct list_head *devices = dm_table_get_devices(t);
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
int page_size = PAGE_SIZE;

if (t->type != DM_TYPE_NONE) {
/* target already set the table's type */
Expand Down Expand Up @@ -984,7 +992,7 @@ static int dm_table_determine_type(struct dm_table *t)
verify_bio_based:
/* We must use this table as bio-based */
t->type = DM_TYPE_BIO_BASED;
if (dm_table_supports_dax(t, PAGE_SIZE) ||
if (dm_table_supports_dax(t, device_supports_dax, &page_size) ||
(list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
t->type = DM_TYPE_DAX_BIO_BASED;
} else {
Expand Down Expand Up @@ -1883,6 +1891,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
bool wc = false, fua = false;
int page_size = PAGE_SIZE;

/*
* Copy table's limits to the DM device's request_queue
Expand Down Expand Up @@ -1910,8 +1919,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
}
blk_queue_write_cache(q, wc, fua);

if (dm_table_supports_dax(t, PAGE_SIZE))
if (dm_table_supports_dax(t, device_supports_dax, &page_size)) {
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
if (dm_table_supports_dax(t, device_synchronous, NULL))
set_dax_synchronous(t->md->dax_dev);
}
else
blk_queue_flag_clear(QUEUE_FLAG_DAX, q);

Expand Down
5 changes: 3 additions & 2 deletions drivers/md/dm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1117,7 +1117,7 @@ static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bd
if (!map)
return false;

ret = dm_table_supports_dax(map, blocksize);
ret = dm_table_supports_dax(map, device_supports_dax, &blocksize);

dm_put_live_table(md, srcu_idx);

Expand Down Expand Up @@ -1989,7 +1989,8 @@ static struct mapped_device *alloc_dev(int minor)
sprintf(md->disk->disk_name, "dm-%d", minor);

if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
md->dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
md->dax_dev = alloc_dax(md, md->disk->disk_name,
&dm_dax_ops, 0);
if (!md->dax_dev)
goto bad;
}
Expand Down
5 changes: 4 additions & 1 deletion drivers/md/dm.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ bool dm_table_bio_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
bool dm_table_supports_dax(struct dm_table *t, int blocksize);
bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn,
int *blocksize);
int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data);

void dm_lock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md);
Expand Down
1 change: 1 addition & 0 deletions drivers/nvdimm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o
obj-$(CONFIG_ND_BLK) += nd_blk.o
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
obj-$(CONFIG_OF_PMEM) += of_pmem.o
obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o

nd_pmem-y := pmem.o

Expand Down
6 changes: 4 additions & 2 deletions drivers/nvdimm/claim.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
sector_t sector = offset >> 9;
int rc = 0;
int rc = 0, ret = 0;

if (unlikely(!size))
return 0;
Expand Down Expand Up @@ -293,7 +293,9 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
}

memcpy_flushcache(nsio->addr + offset, buf, size);
nvdimm_flush(to_nd_region(ndns->dev.parent));
ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
if (ret)
rc = ret;

return rc;
}
Expand Down
8 changes: 4 additions & 4 deletions drivers/nvdimm/namespace_devs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1822,8 +1822,8 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
&& !guid_equal(&nd_set->type_guid,
&nd_label->type_guid)) {
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
nd_set->type_guid.b,
nd_label->type_guid.b);
&nd_set->type_guid,
&nd_label->type_guid);
continue;
}

Expand Down Expand Up @@ -2227,8 +2227,8 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
if (namespace_label_has(ndd, type_guid)) {
if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) {
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
nd_set->type_guid.b,
nd_label->type_guid.b);
&nd_set->type_guid,
&nd_label->type_guid);
return ERR_PTR(-EAGAIN);
}

Expand Down
1 change: 1 addition & 0 deletions drivers/nvdimm/nd.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ struct nd_region {
struct badblocks bb;
struct nd_interleave_set *nd_set;
struct nd_percpu_lane __percpu *lane;
int (*flush)(struct nd_region *nd_region, struct bio *bio);
struct nd_mapping mapping[0];
};

Expand Down
125 changes: 125 additions & 0 deletions drivers/nvdimm/nd_virtio.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// SPDX-License-Identifier: GPL-2.0
/*
* virtio_pmem.c: Virtio pmem Driver
*
* Discovers persistent memory range information
* from host and provides a virtio based flushing
* interface.
*/
#include "virtio_pmem.h"
#include "nd.h"

/* The interrupt handler */
void virtio_pmem_host_ack(struct virtqueue *vq)
{
struct virtio_pmem *vpmem = vq->vdev->priv;
struct virtio_pmem_request *req_data, *req_buf;
unsigned long flags;
unsigned int len;

spin_lock_irqsave(&vpmem->pmem_lock, flags);
while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
req_data->done = true;
wake_up(&req_data->host_acked);

if (!list_empty(&vpmem->req_list)) {
req_buf = list_first_entry(&vpmem->req_list,
struct virtio_pmem_request, list);
req_buf->wq_buf_avail = true;
wake_up(&req_buf->wq_buf);
list_del(&req_buf->list);
}
}
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
}
EXPORT_SYMBOL_GPL(virtio_pmem_host_ack);

/* The request submission function */
static int virtio_pmem_flush(struct nd_region *nd_region)
{
struct virtio_device *vdev = nd_region->provider_data;
struct virtio_pmem *vpmem = vdev->priv;
struct virtio_pmem_request *req_data;
struct scatterlist *sgs[2], sg, ret;
unsigned long flags;
int err, err1;

might_sleep();
req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
if (!req_data)
return -ENOMEM;

req_data->done = false;
init_waitqueue_head(&req_data->host_acked);
init_waitqueue_head(&req_data->wq_buf);
INIT_LIST_HEAD(&req_data->list);
req_data->req.type = cpu_to_le32(VIRTIO_PMEM_REQ_TYPE_FLUSH);
sg_init_one(&sg, &req_data->req, sizeof(req_data->req));
sgs[0] = &sg;
sg_init_one(&ret, &req_data->resp.ret, sizeof(req_data->resp));
sgs[1] = &ret;

spin_lock_irqsave(&vpmem->pmem_lock, flags);
/*
* If virtqueue_add_sgs returns -ENOSPC then req_vq virtual
* queue does not have free descriptor. We add the request
* to req_list and wait for host_ack to wake us up when free
* slots are available.
*/
while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req_data,
GFP_ATOMIC)) == -ENOSPC) {

dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
req_data->wq_buf_avail = false;
list_add_tail(&req_data->list, &vpmem->req_list);
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);

/* A host response results in "host_ack" getting called */
wait_event(req_data->wq_buf, req_data->wq_buf_avail);
spin_lock_irqsave(&vpmem->pmem_lock, flags);
}
err1 = virtqueue_kick(vpmem->req_vq);
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
/*
* virtqueue_add_sgs failed with error different than -ENOSPC, we can't
* do anything about that.
*/
if (err || !err1) {
dev_info(&vdev->dev, "failed to send command to virtio pmem device\n");
err = -EIO;
} else {
/* A host repsonse results in "host_ack" getting called */
wait_event(req_data->host_acked, req_data->done);
err = le32_to_cpu(req_data->resp.ret);
}

kfree(req_data);
return err;
};

/* The asynchronous flush callback function */
int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
{
/*
* Create child bio for asynchronous flush and chain with
* parent bio. Otherwise directly call nd_region flush.
*/
if (bio && bio->bi_iter.bi_sector != -1) {
struct bio *child = bio_alloc(GFP_ATOMIC, 0);

if (!child)
return -ENOMEM;
bio_copy_dev(child, bio);
child->bi_opf = REQ_PREFLUSH;
child->bi_iter.bi_sector = -1;
bio_chain(child, bio);
submit_bio(child);
return 0;
}
if (virtio_pmem_flush(nd_region))
return -EIO;

return 0;
};
EXPORT_SYMBOL_GPL(async_pmem_flush);
MODULE_LICENSE("GPL");
Loading

0 comments on commit f8c3500

Please sign in to comment.