Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block fixes from Jens Axboe:
 "Small collection of fixes that would be nice to have in -rc1. This
  contains:

   - NVMe pull request form Christoph, mostly with fixes for nvme-pci,
     host memory buffer in particular.

   - Error handling fixup for cgwb_create(), in case allocation of 'wb'
     fails. From Christophe Jaillet.

   - Ensure that trace_block_getrq() gets the 'dev' in an appropriate
     fashion, to avoid a potential NULL deref. From Greg Thelen.

   - Regression fix for dm-mq with blk-mq, fixing a problem with
     stacking IO schedulers. From me.

   - string.h fixup, fixing an issue with memcpy_and_pad(). This
     original change came in through an NVMe dependency, which is why
     I'm including it here. From Martin Wilck.

   - Fix potential int overflow in __blkdev_sectors_to_bio_pages(), from
     Mikulas.

   - MBR enable fix for sed-opal, from Scott"

* 'for-linus' of git://git.kernel.dk/linux-block:
  block: directly insert blk-mq request from blk_insert_cloned_request()
  mm/backing-dev.c: fix an error handling path in 'cgwb_create()'
  string.h: un-fortify memcpy_and_pad
  nvme-pci: implement the HMB entry number and size limitations
  nvme-pci: propagate (some) errors from host memory buffer setup
  nvme-pci: use appropriate initial chunk size for HMB allocation
  nvme-pci: fix host memory buffer allocation fallback
  nvme: fix lightnvm check
  block: fix integer overflow in __blkdev_sectors_to_bio_pages()
  block: sed-opal: Set MBRDone on S3 resume path if TPER is MBREnabled
  block: tolerate tracing of NULL bio
  • Loading branch information
torvalds committed Sep 13, 2017
2 parents 20e52ee + 157f377 commit 80a0d64
Show file tree
Hide file tree
Showing 14 changed files with 134 additions and 81 deletions.
7 changes: 6 additions & 1 deletion block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2342,7 +2342,12 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
if (q->mq_ops) {
if (blk_queue_io_stat(q))
blk_account_io_start(rq, true);
blk_mq_sched_insert_request(rq, false, true, false, false);
/*
* Since we have a scheduler attached on the top device,
* bypass a potential scheduler on the bottom device for
* insert.
*/
blk_mq_request_bypass_insert(rq);
return BLK_STS_OK;
}

Expand Down
4 changes: 2 additions & 2 deletions block/blk-lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,9 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
*/
static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
{
sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1;
sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);

return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES);
return min(pages, (sector_t)BIO_MAX_PAGES);
}

/**
Expand Down
16 changes: 16 additions & 0 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -1401,6 +1401,22 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
blk_mq_hctx_mark_pending(hctx, ctx);
}

/*
* Should only be used carefully, when the caller knows we want to
* bypass a potential IO scheduler on the target device.
*/
void blk_mq_request_bypass_insert(struct request *rq)
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);

spin_lock(&hctx->lock);
list_add_tail(&rq->queuelist, &hctx->dispatch);
spin_unlock(&hctx->lock);

blk_mq_run_hw_queue(hctx, false);
}

void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list)

Expand Down
1 change: 1 addition & 0 deletions block/blk-mq.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
*/
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head);
void blk_mq_request_bypass_insert(struct request *rq);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);

Expand Down
1 change: 1 addition & 0 deletions block/opal_proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ enum opal_response_token {
#define GENERIC_HOST_SESSION_NUM 0x41

#define TPER_SYNC_SUPPORTED 0x01
#define MBR_ENABLED_MASK 0x10

#define TINY_ATOM_DATA_MASK 0x3F
#define TINY_ATOM_SIGNED 0x40
Expand Down
32 changes: 32 additions & 0 deletions block/sed-opal.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ struct parsed_resp {

struct opal_dev {
bool supported;
bool mbr_enabled;

void *data;
sec_send_recv *send_recv;
Expand Down Expand Up @@ -283,6 +284,14 @@ static bool check_tper(const void *data)
return true;
}

static bool check_mbrenabled(const void *data)
{
const struct d0_locking_features *lfeat = data;
u8 sup_feat = lfeat->supported_features;

return !!(sup_feat & MBR_ENABLED_MASK);
}

static bool check_sum(const void *data)
{
const struct d0_single_user_mode *sum = data;
Expand Down Expand Up @@ -417,6 +426,7 @@ static int opal_discovery0_end(struct opal_dev *dev)
u32 hlen = be32_to_cpu(hdr->length);

print_buffer(dev->resp, hlen);
dev->mbr_enabled = false;

if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
pr_debug("Discovery length overflows buffer (%zu+%u)/%u\n",
Expand All @@ -442,6 +452,8 @@ static int opal_discovery0_end(struct opal_dev *dev)
check_geometry(dev, body);
break;
case FC_LOCKING:
dev->mbr_enabled = check_mbrenabled(body->features);
break;
case FC_ENTERPRISE:
case FC_DATASTORE:
/* some ignored properties */
Expand Down Expand Up @@ -2190,6 +2202,21 @@ static int __opal_lock_unlock(struct opal_dev *dev,
return next(dev);
}

static int __opal_set_mbr_done(struct opal_dev *dev, struct opal_key *key)
{
u8 mbr_done_tf = 1;
const struct opal_step mbrdone_step [] = {
{ opal_discovery0, },
{ start_admin1LSP_opal_session, key },
{ set_mbr_done, &mbr_done_tf },
{ end_opal_session, },
{ NULL, }
};

dev->steps = mbrdone_step;
return next(dev);
}

static int opal_lock_unlock(struct opal_dev *dev,
struct opal_lock_unlock *lk_unlk)
{
Expand Down Expand Up @@ -2345,6 +2372,11 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
suspend->unlk.session.sum);
was_failure = true;
}
if (dev->mbr_enabled) {
ret = __opal_set_mbr_done(dev, &suspend->unlk.session.opal_key);
if (ret)
pr_debug("Failed to set MBR Done in S3 resume\n");
}
}
mutex_unlock(&dev->dev_lock);
return was_failure;
Expand Down
11 changes: 7 additions & 4 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1897,6 +1897,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->cntlid = le16_to_cpu(id->cntlid);
ctrl->hmpre = le32_to_cpu(id->hmpre);
ctrl->hmmin = le32_to_cpu(id->hmmin);
ctrl->hmminds = le32_to_cpu(id->hmminds);
ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
}

kfree(id);
Expand Down Expand Up @@ -2377,10 +2379,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)

nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid);

if (nvme_nvm_ns_supported(ns, id) &&
nvme_nvm_register(ns, disk_name, node)) {
dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
goto out_free_id;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
if (nvme_nvm_register(ns, disk_name, node)) {
dev_warn(ctrl->device, "LightNVM init failure\n");
goto out_free_id;
}
}

disk = alloc_disk_node(0, node);
Expand Down
26 changes: 0 additions & 26 deletions drivers/nvme/host/lightnvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -955,29 +955,3 @@ void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
&nvm_dev_attr_group);
}

/* move to shared place when used in multiple places. */
#define PCI_VENDOR_ID_CNEX 0x1d1d
#define PCI_DEVICE_ID_CNEX_WL 0x2807
#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f

int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
struct nvme_ctrl *ctrl = ns->ctrl;
/* XXX: this is poking into PCI structures from generic code! */
struct pci_dev *pdev = to_pci_dev(ctrl->dev);

/* QEMU NVMe simulator - PCI ID + Vendor specific bit */
if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
pdev->device == PCI_DEVICE_ID_CNEX_QEMU &&
id->vs[0] == 0x1)
return 1;

/* CNEX Labs - PCI ID + Vendor specific bit */
if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
pdev->device == PCI_DEVICE_ID_CNEX_WL &&
id->vs[0] == 0x1)
return 1;

return 0;
}
13 changes: 8 additions & 5 deletions drivers/nvme/host/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ enum nvme_quirks {
* The deepest sleep state should not be used.
*/
NVME_QUIRK_NO_DEEPEST_PS = (1 << 5),

/*
* Supports the LighNVM command set if indicated in vs[1].
*/
NVME_QUIRK_LIGHTNVM = (1 << 6),
};

/*
Expand Down Expand Up @@ -176,8 +181,11 @@ struct nvme_ctrl {
u64 ps_max_latency_us;
bool apst_enabled;

/* PCIe only: */
u32 hmpre;
u32 hmmin;
u32 hmminds;
u16 hmmaxd;

/* Fabrics only */
u16 sqsize;
Expand Down Expand Up @@ -320,7 +328,6 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);

#ifdef CONFIG_NVM
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns);
int nvme_nvm_register_sysfs(struct nvme_ns *ns);
Expand All @@ -339,10 +346,6 @@ static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns)
return 0;
}
static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {};
static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
return 0;
}
static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
unsigned long arg)
{
Expand Down
74 changes: 50 additions & 24 deletions drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1612,21 +1612,23 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
dev->host_mem_descs = NULL;
}

static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
u32 chunk_size)
{
struct nvme_host_mem_buf_desc *descs;
u32 chunk_size, max_entries, len;
u32 max_entries, len;
dma_addr_t descs_dma;
int i = 0;
void **bufs;
u64 size = 0, tmp;

/* start big and work our way down */
chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
retry:
tmp = (preferred + chunk_size - 1);
do_div(tmp, chunk_size);
max_entries = tmp;

if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
max_entries = dev->ctrl.hmmaxd;

descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs),
&descs_dma, GFP_KERNEL);
if (!descs)
Expand All @@ -1650,15 +1652,9 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
i++;
}

if (!size || (min && size < min)) {
dev_warn(dev->ctrl.device,
"failed to allocate host memory buffer.\n");
if (!size)
goto out_free_bufs;
}

dev_info(dev->ctrl.device,
"allocated %lld MiB host memory buffer.\n",
size >> ilog2(SZ_1M));
dev->nr_host_mem_descs = i;
dev->host_mem_size = size;
dev->host_mem_descs = descs;
Expand All @@ -1679,29 +1675,43 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
descs_dma);
out:
/* try a smaller chunk size if we failed early */
if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
chunk_size /= 2;
goto retry;
}
dev->host_mem_descs = NULL;
return -ENOMEM;
}

static void nvme_setup_host_mem(struct nvme_dev *dev)
static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
{
u32 chunk_size;

/* start big and work our way down */
for (chunk_size = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES);
chunk_size >= max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
chunk_size /= 2) {
if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
if (!min || dev->host_mem_size >= min)
return 0;
nvme_free_host_mem(dev);
}
}

return -ENOMEM;
}

static int nvme_setup_host_mem(struct nvme_dev *dev)
{
u64 max = (u64)max_host_mem_size_mb * SZ_1M;
u64 preferred = (u64)dev->ctrl.hmpre * 4096;
u64 min = (u64)dev->ctrl.hmmin * 4096;
u32 enable_bits = NVME_HOST_MEM_ENABLE;
int ret = 0;

preferred = min(preferred, max);
if (min > max) {
dev_warn(dev->ctrl.device,
"min host memory (%lld MiB) above limit (%d MiB).\n",
min >> ilog2(SZ_1M), max_host_mem_size_mb);
nvme_free_host_mem(dev);
return;
return 0;
}

/*
Expand All @@ -1715,12 +1725,21 @@ static void nvme_setup_host_mem(struct nvme_dev *dev)
}

if (!dev->host_mem_descs) {
if (nvme_alloc_host_mem(dev, min, preferred))
return;
if (nvme_alloc_host_mem(dev, min, preferred)) {
dev_warn(dev->ctrl.device,
"failed to allocate host memory buffer.\n");
return 0; /* controller must work without HMB */
}

dev_info(dev->ctrl.device,
"allocated %lld MiB host memory buffer.\n",
dev->host_mem_size >> ilog2(SZ_1M));
}

if (nvme_set_host_mem(dev, enable_bits))
ret = nvme_set_host_mem(dev, enable_bits);
if (ret)
nvme_free_host_mem(dev);
return ret;
}

static int nvme_setup_io_queues(struct nvme_dev *dev)
Expand Down Expand Up @@ -2164,8 +2183,11 @@ static void nvme_reset_work(struct work_struct *work)
"unable to allocate dma for dbbuf\n");
}

if (dev->ctrl.hmpre)
nvme_setup_host_mem(dev);
if (dev->ctrl.hmpre) {
result = nvme_setup_host_mem(dev);
if (result < 0)
goto out;
}

result = nvme_setup_io_queues(dev);
if (result)
Expand Down Expand Up @@ -2497,6 +2519,10 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
Expand Down
4 changes: 3 additions & 1 deletion include/linux/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,9 @@ struct nvme_id_ctrl {
__le16 mntmt;
__le16 mxtmt;
__le32 sanicap;
__u8 rsvd332[180];
__le32 hmminds;
__le16 hmmaxd;
__u8 rsvd338[174];
__u8 sqes;
__u8 cqes;
__le16 maxcmd;
Expand Down
Loading

0 comments on commit 80a0d64

Please sign in to comment.