Skip to content

Commit

Permalink
Merge tag 'for-linus-2019-08-17' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block fixes from Jens Axboe:
 "A collection of fixes that should go into this series. This contains:

   - Revert of the REQ_NOWAIT_INLINE and associated dio changes. There
     were still corner cases there, and even though I had a solution for
     it, it's too involved for this stage. (me)

   - Set of NVMe fixes (via Sagi)

   - io_uring fix for fixed buffers (Anthony)

   - io_uring defer issue fix (Jackie)

   - Regression fix for queue sync at exit time (zhengbin)

   - xen blk-back memory leak fix (Wenwen)"

* tag 'for-linus-2019-08-17' of git://git.kernel.dk/linux-block:
  io_uring: fix an issue when IOSQE_IO_LINK is inserted into defer list
  block: remove REQ_NOWAIT_INLINE
  io_uring: fix manual setup of iov_iter for fixed buffers
  xen/blkback: fix memory leaks
  blk-mq: move cancel of requeue_work to the front of blk_exit_queue
  nvme-pci: Fix async probe remove race
  nvme: fix controller removal race with scan work
  nvme-rdma: fix possible use-after-free in connect error flow
  nvme: fix a possible deadlock when passthru commands sent to a multipath device
  nvme-core: Fix extra device_put() call on error path
  nvmet-file: fix nvmet_file_flush() always returning an error
  nvmet-loop: Flush nvme_delete_wq when removing the port
  nvmet: Fix use-after-free bug when a port is removed
  nvme-multipath: revalidate nvme_ns_head gendisk in nvme_validate_ns
  • Loading branch information
torvalds committed Aug 18, 2019
2 parents 85d8d3b + a982eeb commit 8fde283
Show file tree
Hide file tree
Showing 15 changed files with 167 additions and 84 deletions.
10 changes: 2 additions & 8 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -1958,13 +1958,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
rq = blk_mq_get_request(q, bio, &data);
if (unlikely(!rq)) {
rq_qos_cleanup(q, bio);

cookie = BLK_QC_T_NONE;
if (bio->bi_opf & REQ_NOWAIT_INLINE)
cookie = BLK_QC_T_EAGAIN;
else if (bio->bi_opf & REQ_NOWAIT)
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
return cookie;
return BLK_QC_T_NONE;
}

trace_block_getrq(q, bio, bio->bi_opf);
Expand Down Expand Up @@ -2666,8 +2662,6 @@ void blk_mq_release(struct request_queue *q)
struct blk_mq_hw_ctx *hctx, *next;
int i;

cancel_delayed_work_sync(&q->requeue_work);

queue_for_each_hw_ctx(q, hctx, i)
WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));

Expand Down
3 changes: 3 additions & 0 deletions block/blk-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,9 @@ static void __blk_release_queue(struct work_struct *work)

blk_free_queue_stats(q->stats);

if (queue_is_mq(q))
cancel_delayed_work_sync(&q->requeue_work);

blk_exit_queue(q);

blk_queue_free_zone_bitmaps(q);
Expand Down
6 changes: 3 additions & 3 deletions drivers/block/xen-blkback/xenbus.c
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
}
}

err = -ENOMEM;
for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
Expand All @@ -987,7 +988,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
if (err) {
xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
return err;
goto fail;
}

return 0;
Expand All @@ -1007,8 +1008,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
}
kfree(req);
}
return -ENOMEM;

return err;
}

static int connect_ring(struct backend_info *be)
Expand Down
15 changes: 14 additions & 1 deletion drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1286,6 +1286,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
*/
if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
mutex_lock(&ctrl->scan_lock);
mutex_lock(&ctrl->subsys->lock);
nvme_mpath_start_freeze(ctrl->subsys);
nvme_mpath_wait_freeze(ctrl->subsys);
nvme_start_freeze(ctrl);
nvme_wait_freeze(ctrl);
}
Expand Down Expand Up @@ -1316,6 +1319,8 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
nvme_update_formats(ctrl);
if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys);
mutex_unlock(&ctrl->subsys->lock);
mutex_unlock(&ctrl->scan_lock);
}
if (effects & NVME_CMD_EFFECTS_CCC)
Expand Down Expand Up @@ -1715,6 +1720,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->head->disk) {
nvme_update_disk_info(ns->head->disk, ns, id);
blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
revalidate_disk(ns->head->disk);
}
#endif
}
Expand Down Expand Up @@ -2487,6 +2493,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
if (ret) {
dev_err(ctrl->device,
"failed to register subsystem device.\n");
put_device(&subsys->dev);
goto out_unlock;
}
ida_init(&subsys->ns_ida);
Expand All @@ -2509,7 +2516,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
nvme_put_subsystem(subsys);
out_unlock:
mutex_unlock(&nvme_subsystems_lock);
put_device(&subsys->dev);
return ret;
}

Expand Down Expand Up @@ -3571,6 +3577,13 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
struct nvme_ns *ns, *next;
LIST_HEAD(ns_list);

/*
* make sure to requeue I/O to all namespaces as these
* might result from the scan itself and must complete
* for the scan_work to make progress
*/
nvme_mpath_clear_ctrl_paths(ctrl);

/* prevent racing with ns scanning */
flush_work(&ctrl->scan_work);

Expand Down
76 changes: 70 additions & 6 deletions drivers/nvme/host/multipath.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,36 @@ module_param(multipath, bool, 0444);
MODULE_PARM_DESC(multipath,
"turn on native support for multiple controllers per subsystem");

void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
{
struct nvme_ns_head *h;

lockdep_assert_held(&subsys->lock);
list_for_each_entry(h, &subsys->nsheads, entry)
if (h->disk)
blk_mq_unfreeze_queue(h->disk->queue);
}

void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
{
struct nvme_ns_head *h;

lockdep_assert_held(&subsys->lock);
list_for_each_entry(h, &subsys->nsheads, entry)
if (h->disk)
blk_mq_freeze_queue_wait(h->disk->queue);
}

void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
{
struct nvme_ns_head *h;

lockdep_assert_held(&subsys->lock);
list_for_each_entry(h, &subsys->nsheads, entry)
if (h->disk)
blk_freeze_queue_start(h->disk->queue);
}

/*
* If multipathing is enabled we need to always use the subsystem instance
* number for numbering our devices to avoid conflicts between subsystems that
Expand Down Expand Up @@ -104,18 +134,34 @@ static const char *nvme_ana_state_names[] = {
[NVME_ANA_CHANGE] = "change",
};

void nvme_mpath_clear_current_path(struct nvme_ns *ns)
bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
struct nvme_ns_head *head = ns->head;
bool changed = false;
int node;

if (!head)
return;
goto out;

for_each_node(node) {
if (ns == rcu_access_pointer(head->current_path[node]))
if (ns == rcu_access_pointer(head->current_path[node])) {
rcu_assign_pointer(head->current_path[node], NULL);
changed = true;
}
}
out:
return changed;
}

void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;

mutex_lock(&ctrl->scan_lock);
list_for_each_entry(ns, &ctrl->namespaces, list)
if (nvme_mpath_clear_current_path(ns))
kblockd_schedule_work(&ns->head->requeue_work);
mutex_unlock(&ctrl->scan_lock);
}

static bool nvme_path_is_disabled(struct nvme_ns *ns)
Expand Down Expand Up @@ -226,6 +272,24 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
return ns;
}

static bool nvme_available_path(struct nvme_ns_head *head)
{
struct nvme_ns *ns;

list_for_each_entry_rcu(ns, &head->list, siblings) {
switch (ns->ctrl->state) {
case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
/* fallthru */
return true;
default:
break;
}
}
return false;
}

static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
struct bio *bio)
{
Expand All @@ -252,14 +316,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
disk_devt(ns->head->disk),
bio->bi_iter.bi_sector);
ret = direct_make_request(bio);
} else if (!list_empty_careful(&head->list)) {
dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
} else if (nvme_available_path(head)) {
dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");

spin_lock_irq(&head->requeue_lock);
bio_list_add(&head->requeue_list, bio);
spin_unlock_irq(&head->requeue_lock);
} else {
dev_warn_ratelimited(dev, "no path - failing I/O\n");
dev_warn_ratelimited(dev, "no available path - failing I/O\n");

bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
Expand Down
21 changes: 19 additions & 2 deletions drivers/nvme/host/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,9 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
return ctrl->ana_log_buf != NULL;
}

void nvme_mpath_unfreeze(struct nvme_subsystem *subsys);
void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
struct nvme_ctrl *ctrl, int *flags);
void nvme_failover_req(struct request *req);
Expand All @@ -500,7 +503,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
void nvme_mpath_clear_current_path(struct nvme_ns *ns);
bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);

static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
Expand Down Expand Up @@ -548,7 +552,11 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
}
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
return false;
}
static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{
}
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
Expand All @@ -568,6 +576,15 @@ static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
{
}
static inline void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
{
}
static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
{
}
static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
{
}
#endif /* CONFIG_NVME_MULTIPATH */

#ifdef CONFIG_NVM
Expand Down
3 changes: 2 additions & 1 deletion drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -2695,7 +2695,7 @@ static void nvme_async_probe(void *data, async_cookie_t cookie)
{
struct nvme_dev *dev = data;

nvme_reset_ctrl_sync(&dev->ctrl);
flush_work(&dev->ctrl.reset_work);
flush_work(&dev->ctrl.scan_work);
nvme_put_ctrl(&dev->ctrl);
}
Expand Down Expand Up @@ -2761,6 +2761,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)

dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));

nvme_reset_ctrl(&dev->ctrl);
nvme_get_ctrl(&dev->ctrl);
async_schedule(nvme_async_probe, dev);

Expand Down
16 changes: 11 additions & 5 deletions drivers/nvme/host/rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -562,13 +562,17 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
return ret;
}

static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{
rdma_disconnect(queue->cm_id);
ib_drain_qp(queue->qp);
}

static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{
if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
return;

rdma_disconnect(queue->cm_id);
ib_drain_qp(queue->qp);
__nvme_rdma_stop_queue(queue);
}

static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
Expand Down Expand Up @@ -607,11 +611,13 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
else
ret = nvmf_connect_admin_queue(&ctrl->ctrl);

if (!ret)
if (!ret) {
set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
else
} else {
__nvme_rdma_stop_queue(queue);
dev_info(ctrl->ctrl.device,
"failed to connect queue: %d ret=%d\n", idx, ret);
}
return ret;
}

Expand Down
1 change: 1 addition & 0 deletions drivers/nvme/target/configfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,7 @@ static void nvmet_port_subsys_drop_link(struct config_item *parent,

found:
list_del(&p->entry);
nvmet_port_del_ctrls(port, subsys);
nvmet_port_disc_changed(port, subsys);

if (list_empty(&port->subsystems))
Expand Down
15 changes: 15 additions & 0 deletions drivers/nvme/target/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
u16 status;

switch (errno) {
case 0:
status = NVME_SC_SUCCESS;
break;
case -ENOSPC:
req->error_loc = offsetof(struct nvme_rw_command, length);
status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
Expand Down Expand Up @@ -280,6 +283,18 @@ void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
}
EXPORT_SYMBOL_GPL(nvmet_unregister_transport);

void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys)
{
struct nvmet_ctrl *ctrl;

mutex_lock(&subsys->lock);
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
if (ctrl->port == port)
ctrl->ops->delete_ctrl(ctrl);
}
mutex_unlock(&subsys->lock);
}

int nvmet_enable_port(struct nvmet_port *port)
{
const struct nvmet_fabrics_ops *ops;
Expand Down
8 changes: 8 additions & 0 deletions drivers/nvme/target/loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,14 @@ static void nvme_loop_remove_port(struct nvmet_port *port)
mutex_lock(&nvme_loop_ports_mutex);
list_del_init(&port->entry);
mutex_unlock(&nvme_loop_ports_mutex);

/*
* Ensure any ctrls that are in the process of being
* deleted are in fact deleted before we return
* and free the port. This is to prevent active
* ctrls from using a port after it's freed.
*/
flush_workqueue(nvme_delete_wq);
}

static const struct nvmet_fabrics_ops nvme_loop_ops = {
Expand Down
Loading

0 comments on commit 8fde283

Please sign in to comment.