Skip to content

Commit

Permalink
Merge tag 'for-linus-20180623' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block fixes from Jens Axboe:

 - Further timeout fixes. We aren't quite there yet, so expect another
   round of fixes for that to completely close some of the IRQ vs
   completion races. (Christoph/Bart)

 - Set of NVMe fixes from the usual suspects, mostly error handling

 - Two off-by-one fixes (Dan)

 - Another bdi race fix (Jan)

 - Fix nbd reconfigure with NBD_DISCONNECT_ON_CLOSE (Doron)

* tag 'for-linus-20180623' of git://git.kernel.dk/linux-block:
  blk-mq: Fix timeout handling in case the timeout handler returns BLK_EH_DONE
  bdi: Fix another oops in wb_workfn()
  lightnvm: Remove depends on HAS_DMA in case of platform dependency
  nvme-pci: limit max IO size and segments to avoid high order allocations
  nvme-pci: move nvme_kill_queues to nvme_remove_dead_ctrl
  nvme-fc: release io queues to allow fast fail
  nbd: Add the nbd NBD_DISCONNECT_ON_CLOSE config flag.
  block: sed-opal: Fix a couple off by one bugs
  blk-mq-debugfs: Off by one in blk_mq_rq_state_name()
  nvmet: reset keep alive timer in controller enable
  nvme-rdma: don't override opts->queue_size
  nvme-rdma: Fix command completion race at error recovery
  nvme-rdma: fix possible free of a non-allocated async event buffer
  nvme-rdma: fix possible double free condition when failing to create a controller
  Revert "block: Add warning for bi_next not NULL in bio_endio()"
  block: fix timeout changes for legacy request drivers
  • Loading branch information
torvalds committed Jun 23, 2018
2 parents 2dd3f7c + f5e350f commit 77072ca
Show file tree
Hide file tree
Showing 20 changed files with 142 additions and 84 deletions.
3 changes: 0 additions & 3 deletions block/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1807,9 +1807,6 @@ void bio_endio(struct bio *bio)
if (!bio_integrity_endio(bio))
return;

if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL"))
bio->bi_next = NULL;

/*
* Need to have a real endio function for chained bios, otherwise
* various corner cases will break (like stacking block devices that
Expand Down
8 changes: 1 addition & 7 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -273,10 +273,6 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
bio_advance(bio, nbytes);

/* don't actually finish bio if it's part of flush sequence */
/*
* XXX this code looks suspicious - it's not consistent with advancing
* req->bio in caller
*/
if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
bio_endio(bio);
}
Expand Down Expand Up @@ -3081,10 +3077,8 @@ bool blk_update_request(struct request *req, blk_status_t error,
struct bio *bio = req->bio;
unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);

if (bio_bytes == bio->bi_iter.bi_size) {
if (bio_bytes == bio->bi_iter.bi_size)
req->bio = bio->bi_next;
bio->bi_next = NULL;
}

/* Completion has already been traced */
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
Expand Down
2 changes: 1 addition & 1 deletion block/blk-mq-debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ static const char *const blk_mq_rq_state_name_array[] = {

static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state)
{
if (WARN_ON_ONCE((unsigned int)rq_state >
if (WARN_ON_ONCE((unsigned int)rq_state >=
ARRAY_SIZE(blk_mq_rq_state_name_array)))
return "(?)";
return blk_mq_rq_state_name_array[rq_state];
Expand Down
1 change: 0 additions & 1 deletion block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,6 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
}

req->rq_flags &= ~RQF_TIMED_OUT;
blk_add_timer(req);
}

Expand Down
1 change: 1 addition & 0 deletions block/blk-softirq.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ void __blk_complete_request(struct request *req)

local_irq_restore(flags);
}
EXPORT_SYMBOL(__blk_complete_request);

/**
* blk_complete_request - end I/O on a request
Expand Down
1 change: 1 addition & 0 deletions block/blk-timeout.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ void blk_add_timer(struct request *req)
if (!req->timeout)
req->timeout = q->rq_timeout;

req->rq_flags &= ~RQF_TIMED_OUT;
blk_rq_set_deadline(req, jiffies + req->timeout);

/*
Expand Down
4 changes: 2 additions & 2 deletions block/sed-opal.c
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,7 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
return 0;
}

if (n > resp->num) {
if (n >= resp->num) {
pr_debug("Response has %d tokens. Can't access %d\n",
resp->num, n);
return 0;
Expand Down Expand Up @@ -916,7 +916,7 @@ static u64 response_get_u64(const struct parsed_resp *resp, int n)
return 0;
}

if (n > resp->num) {
if (n >= resp->num) {
pr_debug("Response has %d tokens. Can't access %d\n",
resp->num, n);
return 0;
Expand Down
42 changes: 34 additions & 8 deletions drivers/block/nbd.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ struct link_dead_args {
#define NBD_HAS_CONFIG_REF 4
#define NBD_BOUND 5
#define NBD_DESTROY_ON_DISCONNECT 6
#define NBD_DISCONNECT_ON_CLOSE 7

struct nbd_config {
u32 flags;
Expand Down Expand Up @@ -138,6 +139,7 @@ static void nbd_config_put(struct nbd_device *nbd);
static void nbd_connect_reply(struct genl_info *info, int index);
static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
static void nbd_dead_link_work(struct work_struct *work);
static void nbd_disconnect_and_put(struct nbd_device *nbd);

static inline struct device *nbd_to_dev(struct nbd_device *nbd)
{
Expand Down Expand Up @@ -1305,6 +1307,12 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
static void nbd_release(struct gendisk *disk, fmode_t mode)
{
struct nbd_device *nbd = disk->private_data;
struct block_device *bdev = bdget_disk(disk, 0);

if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
bdev->bd_openers == 0)
nbd_disconnect_and_put(nbd);

nbd_config_put(nbd);
nbd_put(nbd);
}
Expand Down Expand Up @@ -1705,6 +1713,10 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
&config->runtime_flags);
put_dev = true;
}
if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
set_bit(NBD_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
}
}

if (info->attrs[NBD_ATTR_SOCKETS]) {
Expand Down Expand Up @@ -1749,6 +1761,17 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
return ret;
}

static void nbd_disconnect_and_put(struct nbd_device *nbd)
{
mutex_lock(&nbd->config_lock);
nbd_disconnect(nbd);
nbd_clear_sock(nbd);
mutex_unlock(&nbd->config_lock);
if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
}

static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
{
struct nbd_device *nbd;
Expand Down Expand Up @@ -1781,13 +1804,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
nbd_put(nbd);
return 0;
}
mutex_lock(&nbd->config_lock);
nbd_disconnect(nbd);
nbd_clear_sock(nbd);
mutex_unlock(&nbd->config_lock);
if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
nbd_disconnect_and_put(nbd);
nbd_config_put(nbd);
nbd_put(nbd);
return 0;
Expand All @@ -1798,7 +1815,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
struct nbd_device *nbd = NULL;
struct nbd_config *config;
int index;
int ret = -EINVAL;
int ret = 0;
bool put_dev = false;

if (!netlink_capable(skb, CAP_SYS_ADMIN))
Expand Down Expand Up @@ -1838,6 +1855,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
!nbd->task_recv) {
dev_err(nbd_to_dev(nbd),
"not configured, cannot reconfigure\n");
ret = -EINVAL;
goto out;
}

Expand All @@ -1862,6 +1880,14 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
&config->runtime_flags))
refcount_inc(&nbd->refs);
}

if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
set_bit(NBD_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
} else {
clear_bit(NBD_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
}
}

if (info->attrs[NBD_ATTR_SOCKETS]) {
Expand Down
2 changes: 1 addition & 1 deletion drivers/block/null_blk.c
Original file line number Diff line number Diff line change
Expand Up @@ -1365,7 +1365,7 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq)
{
pr_info("null: rq %p timed out\n", rq);
blk_mq_complete_request(rq);
__blk_complete_request(rq);
return BLK_EH_DONE;
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/lightnvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

menuconfig NVM
bool "Open-Channel SSD target support"
depends on BLOCK && HAS_DMA && PCI
depends on BLOCK && PCI
select BLK_DEV_NVME
help
Say Y here to get to enable Open-channel SSDs.
Expand Down
1 change: 1 addition & 0 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1808,6 +1808,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
u32 max_segments =
(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;

max_segments = min_not_zero(max_segments, ctrl->max_segments);
blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
}
Expand Down
6 changes: 3 additions & 3 deletions drivers/nvme/host/fc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2790,6 +2790,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
/* re-enable the admin_q so anything new can fast fail */
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);

/* resume the io queues so that things will fast fail */
nvme_start_queues(&ctrl->ctrl);

nvme_fc_ctlr_inactive_on_rport(ctrl);
}

Expand All @@ -2804,9 +2807,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
* waiting for io to terminate
*/
nvme_fc_delete_association(ctrl);

/* resume the io queues so that things will fast fail */
nvme_start_queues(nctrl);
}

static void
Expand Down
1 change: 1 addition & 0 deletions drivers/nvme/host/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ struct nvme_ctrl {
u64 cap;
u32 page_size;
u32 max_hw_sectors;
u32 max_segments;
u16 oncs;
u16 oacs;
u16 nssa;
Expand Down
44 changes: 38 additions & 6 deletions drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@

#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc))

/*
* These can be higher, but we need to ensure that any command doesn't
* require an sg allocation that needs more than a page of data.
*/
#define NVME_MAX_KB_SZ 4096
#define NVME_MAX_SEGS 127

static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);

Expand Down Expand Up @@ -100,6 +107,8 @@ struct nvme_dev {
struct nvme_ctrl ctrl;
struct completion ioq_wait;

mempool_t *iod_mempool;

/* shadow doorbell buffer support: */
u32 *dbbuf_dbs;
dma_addr_t dbbuf_dbs_dma_addr;
Expand Down Expand Up @@ -477,10 +486,7 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
iod->use_sgl = nvme_pci_use_sgls(dev, rq);

if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
iod->use_sgl);

iod->sg = kmalloc(alloc_size, GFP_ATOMIC);
iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
if (!iod->sg)
return BLK_STS_RESOURCE;
} else {
Expand Down Expand Up @@ -526,7 +532,7 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
}

if (iod->sg != iod->inline_sg)
kfree(iod->sg);
mempool_free(iod->sg, dev->iod_mempool);
}

#ifdef CONFIG_BLK_DEV_INTEGRITY
Expand Down Expand Up @@ -2280,6 +2286,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
free_opal_dev(dev->ctrl.opal_dev);
mempool_destroy(dev->iod_mempool);
kfree(dev);
}

Expand All @@ -2289,6 +2296,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)

nvme_get_ctrl(&dev->ctrl);
nvme_dev_disable(dev, false);
nvme_kill_queues(&dev->ctrl);
if (!queue_work(nvme_wq, &dev->remove_work))
nvme_put_ctrl(&dev->ctrl);
}
Expand Down Expand Up @@ -2333,6 +2341,13 @@ static void nvme_reset_work(struct work_struct *work)
if (result)
goto out;

/*
* Limit the max command size to prevent iod->sg allocations going
* over a single page.
*/
dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
dev->ctrl.max_segments = NVME_MAX_SEGS;

result = nvme_init_identify(&dev->ctrl);
if (result)
goto out;
Expand Down Expand Up @@ -2405,7 +2420,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
struct pci_dev *pdev = to_pci_dev(dev->dev);

nvme_kill_queues(&dev->ctrl);
if (pci_get_drvdata(pdev))
device_release_driver(&pdev->dev);
nvme_put_ctrl(&dev->ctrl);
Expand Down Expand Up @@ -2509,6 +2523,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
int node, result = -ENOMEM;
struct nvme_dev *dev;
unsigned long quirks = id->driver_data;
size_t alloc_size;

node = dev_to_node(&pdev->dev);
if (node == NUMA_NO_NODE)
Expand Down Expand Up @@ -2546,6 +2561,23 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result)
goto release_pools;

/*
* Double check that our mempool alloc size will cover the biggest
* command we support.
*/
alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ,
NVME_MAX_SEGS, true);
WARN_ON_ONCE(alloc_size > PAGE_SIZE);

dev->iod_mempool = mempool_create_node(1, mempool_kmalloc,
mempool_kfree,
(void *) alloc_size,
GFP_KERNEL, node);
if (!dev->iod_mempool) {
result = -ENOMEM;
goto release_pools;
}

dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));

nvme_get_ctrl(&dev->ctrl);
Expand Down
Loading

0 comments on commit 77072ca

Please sign in to comment.