Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block fixes from Jens Axboe:
 "A selection of fixes/changes that should make it into this series.
  This contains:

   - NVMe, two merges, containing:
        - pci-e, rdma, and fc fixes
        - Device quirks

   - Fix for a badblocks leak in null_blk

   - bcache fix from Rui Hua for a race condition regression where
     -EINTR was returned to upper layers that didn't expect it.

   - Regression fix for blktrace for a bug introduced in this series.

   - blktrace cleanup for cgroup id.

   - bdi registration error handling.

   - Small series with cleanups for blk-wbt.

   - Various little fixes for typos and the like.

  Nothing earth shattering, most important are the NVMe and bcache fixes"

* 'for-linus' of git://git.kernel.dk/linux-block: (34 commits)
  nvme-pci: fix NULL pointer dereference in nvme_free_host_mem()
  nvme-rdma: fix memory leak during queue allocation
  blktrace: fix trace mutex deadlock
  nvme-rdma: Use mr pool
  nvme-rdma: Check remotely invalidated rkey matches our expected rkey
  nvme-rdma: wait for local invalidation before completing a request
  nvme-rdma: don't complete requests before a send work request has completed
  nvme-rdma: don't suppress send completions
  bcache: check return value of register_shrinker
  bcache: recover data from backing when data is clean
  bcache: Fix building error on MIPS
  bcache: add a comment in journal bucket reading
  nvme-fc: don't use bit masks for set/test_bit() numbers
  blk-wbt: fix comments typo
  blk-wbt: move wbt_clear_stat to common place in wbt_done
  blk-sysfs: remove NULL pointer checking in queue_wb_lat_store
  blk-wbt: remove duplicated setting in wbt_init
  nvme-pci: add quirk for delay before CHK RDY for WDC SN200
  block: remove useless assignment in bio_split
  null_blk: fix dev->badblocks leak
  ...
  • Loading branch information
torvalds committed Dec 1, 2017
2 parents df8ba95 + ed56537 commit 75f64f6
Show file tree
Hide file tree
Showing 22 changed files with 291 additions and 211 deletions.
2 changes: 1 addition & 1 deletion block/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1819,7 +1819,7 @@ EXPORT_SYMBOL(bio_endio);
struct bio *bio_split(struct bio *bio, int sectors,
gfp_t gfp, struct bio_set *bs)
{
struct bio *split = NULL;
struct bio *split;

BUG_ON(sectors <= 0);
BUG_ON(sectors >= bio_sectors(bio));
Expand Down
5 changes: 1 addition & 4 deletions block/blk-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -450,12 +450,9 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
ret = wbt_init(q);
if (ret)
return ret;

rwb = q->rq_wb;
if (!rwb)
return -EINVAL;
}

rwb = q->rq_wb;
if (val == -1)
rwb->min_lat_nsec = wbt_default_latency_nsec(q);
else if (val >= 0)
Expand Down
7 changes: 2 additions & 5 deletions block/blk-wbt.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,11 @@ void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)

if (wbt_is_read(stat))
wb_timestamp(rwb, &rwb->last_comp);
wbt_clear_state(stat);
} else {
WARN_ON_ONCE(stat == rwb->sync_cookie);
__wbt_done(rwb, wbt_stat_to_mask(stat));
wbt_clear_state(stat);
}
wbt_clear_state(stat);
}

/*
Expand Down Expand Up @@ -482,7 +481,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)

/*
* At this point we know it's a buffered write. If this is
* kswapd trying to free memory, or REQ_SYNC is set, set, then
* kswapd trying to free memory, or REQ_SYNC is set, then
* it's WB_SYNC_ALL writeback, and we'll use the max limit for
* that. If the write is marked as a background write, then use
* the idle limit, or go to normal if we haven't had competing
Expand Down Expand Up @@ -723,8 +722,6 @@ int wbt_init(struct request_queue *q)
init_waitqueue_head(&rwb->rq_wait[i].wait);
}

rwb->wc = 1;
rwb->queue_depth = RWB_DEF_DEPTH;
rwb->last_comp = rwb->last_issue = jiffies;
rwb->queue = q;
rwb->win_nsec = RWB_WINDOW_NSEC;
Expand Down
9 changes: 6 additions & 3 deletions block/genhd.c
Original file line number Diff line number Diff line change
Expand Up @@ -671,10 +671,13 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
disk->flags |= GENHD_FL_NO_PART_SCAN;
} else {
int ret;

/* Register BDI before referencing it from bdev */
disk_to_dev(disk)->devt = devt;
bdi_register_owner(disk->queue->backing_dev_info,
disk_to_dev(disk));
ret = bdi_register_owner(disk->queue->backing_dev_info,
disk_to_dev(disk));
WARN_ON(ret);
blk_register_region(disk_devt(disk), disk->minors, NULL,
exact_match, exact_lock, disk);
}
Expand Down Expand Up @@ -1389,7 +1392,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)

if (minors > DISK_MAX_PARTS) {
printk(KERN_ERR
"block: can't allocated more than %d partitions\n",
"block: can't allocate more than %d partitions\n",
DISK_MAX_PARTS);
minors = DISK_MAX_PARTS;
}
Expand Down
5 changes: 4 additions & 1 deletion drivers/block/null_blk.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,6 @@ static void nullb_device_release(struct config_item *item)
{
struct nullb_device *dev = to_nullb_device(item);

badblocks_exit(&dev->badblocks);
null_free_device_storage(dev, false);
null_free_dev(dev);
}
Expand Down Expand Up @@ -582,6 +581,10 @@ static struct nullb_device *null_alloc_dev(void)

static void null_free_dev(struct nullb_device *dev)
{
if (!dev)
return;

badblocks_exit(&dev->badblocks);
kfree(dev);
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/md/bcache/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
if (b == -1)
goto err;

k->ptr[i] = PTR(ca->buckets[b].gen,
k->ptr[i] = MAKE_PTR(ca->buckets[b].gen,
bucket_to_sector(c, b),
ca->sb.nr_this_dev);

Expand Down
5 changes: 4 additions & 1 deletion drivers/md/bcache/btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,10 @@ int bch_btree_cache_alloc(struct cache_set *c)
c->shrink.scan_objects = bch_mca_scan;
c->shrink.seeks = 4;
c->shrink.batch = c->btree_pages * 2;
register_shrinker(&c->shrink);

if (register_shrinker(&c->shrink))
pr_warn("bcache: %s: could not register shrinker",
__func__);

return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion drivers/md/bcache/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey
return false;

for (i = 0; i < KEY_PTRS(l); i++)
if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
return false;

Expand Down
7 changes: 6 additions & 1 deletion drivers/md/bcache/journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
* find a sequence of buckets with valid journal entries
*/
for (i = 0; i < ca->sb.njournal_buckets; i++) {
/*
* We must try the index l with ZERO first for
* correctness due to the scenario that the journal
* bucket is circular buffer which might have wrapped
*/
l = (i * 2654435769U) % ca->sb.njournal_buckets;

if (test_bit(l, bitmap))
Expand Down Expand Up @@ -507,7 +512,7 @@ static void journal_reclaim(struct cache_set *c)
continue;

ja->cur_idx = next;
k->ptr[n++] = PTR(0,
k->ptr[n++] = MAKE_PTR(0,
bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
ca->sb.nr_this_dev);
}
Expand Down
13 changes: 6 additions & 7 deletions drivers/md/bcache/request.c
Original file line number Diff line number Diff line change
Expand Up @@ -708,16 +708,15 @@ static void cached_dev_read_error(struct closure *cl)
{
struct search *s = container_of(cl, struct search, cl);
struct bio *bio = &s->bio.bio;
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);

/*
* If cache device is dirty (dc->has_dirty is non-zero), then
* recovery a failed read request from cached device may get a
* stale data back. So read failure recovery is only permitted
* when cache device is clean.
* If read request hit dirty data (s->read_dirty_data is true),
* then recovery a failed read request from cached device may
* get a stale data back. So read failure recovery is only
* permitted when read request hit clean data in cache device,
* or when cache read race happened.
*/
if (s->recoverable &&
(dc && !atomic_read(&dc->has_dirty))) {
if (s->recoverable && !s->read_dirty_data) {
/* Retry from the backing device: */
trace_bcache_read_retry(s->orig_bio);

Expand Down
19 changes: 8 additions & 11 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1449,19 +1449,19 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
int srcu_idx, ret;
u8 data[16] = { 0, };

ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
if (unlikely(!ns))
return -EWOULDBLOCK;

put_unaligned_le64(key, &data[0]);
put_unaligned_le64(sa_key, &data[8]);

memset(&c, 0, sizeof(c));
c.common.opcode = op;
c.common.nsid = cpu_to_le32(head->ns_id);
c.common.nsid = cpu_to_le32(ns->head->ns_id);
c.common.cdw10[0] = cpu_to_le32(cdw10);

ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
if (unlikely(!ns))
ret = -EWOULDBLOCK;
else
ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16);
ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16);
nvme_put_ns_from_disk(head, srcu_idx);
return ret;
}
Expand Down Expand Up @@ -2961,8 +2961,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)

static void nvme_ns_remove(struct nvme_ns *ns)
{
struct nvme_ns_head *head = ns->head;

if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
return;

Expand All @@ -2980,15 +2978,14 @@ static void nvme_ns_remove(struct nvme_ns *ns)

mutex_lock(&ns->ctrl->subsys->lock);
nvme_mpath_clear_current_path(ns);
if (head)
list_del_rcu(&ns->siblings);
list_del_rcu(&ns->siblings);
mutex_unlock(&ns->ctrl->subsys->lock);

mutex_lock(&ns->ctrl->namespaces_mutex);
list_del_init(&ns->list);
mutex_unlock(&ns->ctrl->namespaces_mutex);

synchronize_srcu(&head->srcu);
synchronize_srcu(&ns->head->srcu);
nvme_put_ns(ns);
}

Expand Down
30 changes: 30 additions & 0 deletions drivers/nvme/host/fabrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,34 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);

static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl,
struct request *rq)
{
struct nvme_command *cmd = nvme_req(rq)->cmd;

/*
* We cannot accept any other command until the connect command has
* completed, so only allow connect to pass.
*/
if (!blk_rq_is_passthrough(rq) ||
cmd->common.opcode != nvme_fabrics_command ||
cmd->fabrics.fctype != nvme_fabrics_type_connect) {
/*
* Reconnecting state means transport disruption, which can take
* a long time and even might fail permanently, fail fast to
* give upper layers a chance to failover.
* Deleting state means that the ctrl will never accept commands
* again, fail it permanently.
*/
if (ctrl->state == NVME_CTRL_RECONNECTING ||
ctrl->state == NVME_CTRL_DELETING) {
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;
}
return BLK_STS_RESOURCE; /* try again later */
}

return BLK_STS_OK;
}

#endif /* _NVME_FABRICS_H */
21 changes: 19 additions & 2 deletions drivers/nvme/host/fc.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@


enum nvme_fc_queue_flags {
NVME_FC_Q_CONNECTED = (1 << 0),
NVME_FC_Q_CONNECTED = 0,
NVME_FC_Q_LIVE,
};

#define NVMEFC_QUEUE_DELAY 3 /* ms units */
Expand Down Expand Up @@ -1927,14 +1928,14 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue)
if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags))
return;

clear_bit(NVME_FC_Q_LIVE, &queue->flags);
/*
* Current implementation never disconnects a single queue.
* It always terminates a whole association. So there is never
* a disconnect(queue) LS sent to the target.
*/

queue->connection_id = 0;
clear_bit(NVME_FC_Q_CONNECTED, &queue->flags);
}

static void
Expand Down Expand Up @@ -2013,6 +2014,8 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
if (ret)
break;

set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags);
}

return ret;
Expand Down Expand Up @@ -2320,6 +2323,14 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
return BLK_STS_RESOURCE;
}

static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue,
struct request *rq)
{
if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags)))
return nvmf_check_init_req(&queue->ctrl->ctrl, rq);
return BLK_STS_OK;
}

static blk_status_t
nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
Expand All @@ -2335,6 +2346,10 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
u32 data_len;
blk_status_t ret;

ret = nvme_fc_is_ready(queue, rq);
if (unlikely(ret))
return ret;

ret = nvme_setup_cmd(ns, rq, sqe);
if (ret)
return ret;
Expand Down Expand Up @@ -2727,6 +2742,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (ret)
goto out_disconnect_admin_queue;

set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags);

/*
* Check controller capabilities
*
Expand Down
2 changes: 1 addition & 1 deletion drivers/nvme/host/multipath.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
bio->bi_opf |= REQ_NVME_MPATH;
ret = direct_make_request(bio);
} else if (!list_empty_careful(&head->list)) {
dev_warn_ratelimited(dev, "no path available - requeing I/O\n");
dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");

spin_lock_irq(&head->requeue_lock);
bio_list_add(&head->requeue_list, bio);
Expand Down
2 changes: 1 addition & 1 deletion drivers/nvme/host/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ static inline struct nvme_request *nvme_req(struct request *req)
* NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
* found empirically.
*/
#define NVME_QUIRK_DELAY_AMOUNT 2000
#define NVME_QUIRK_DELAY_AMOUNT 2300

enum nvme_ctrl_state {
NVME_CTRL_NEW,
Expand Down
Loading

0 comments on commit 75f64f6

Please sign in to comment.