Skip to content

Commit

Permalink
Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block updates from Jens Axboe:
 "This is the main pull request for block IO related changes for the
  4.16 kernel. Nothing major in this pull request, but a good amount of
  improvements and fixes all over the map. This contains:

   - BFQ improvements, fixes, and cleanups from Angelo, Chiara, and
     Paolo.

   - Support for SMR zones for deadline and mq-deadline from Damien and
     Christoph.

   - Set of fixes for bcache by way of Michael Lyle, including fixes
     from himself, Kent, Rui, Tang, and Coly.

   - Series from Matias for lightnvm with fixes from Hans Holmberg,
     Javier, and Matias. Mostly centered around pblk, and the removing
     rrpc 1.2 in preparation for supporting 2.0.

   - A couple of NVMe pull requests from Christoph. Nothing major in
     here, just fixes and cleanups, and support for command tracing from
     Johannes.

   - Support for blk-throttle for tracking reads and writes separately.
     From Joseph Qi. A few cleanups/fixes also for blk-throttle from
     Weiping.

   - Series from Mike Snitzer that enables dm to register its queue more
     logically, something that's alwways been problematic on dm since
     it's a stacked device.

   - Series from Ming cleaning up some of the bio accessor use, in
     preparation for supporting multipage bvecs.

   - Various fixes from Ming closing up holes around queue mapping and
     quiescing.

   - BSD partition fix from Richard Narron, fixing a problem where we
     can't mount newer (10/11) FreeBSD partitions.

   - Series from Tejun reworking blk-mq timeout handling. The previous
     scheme relied on atomic bits, but it had races where we would think
     a request had timed out if it to reused at the wrong time.

   - null_blk now supports faking timeouts, to enable us to better
     exercise and test that functionality separately. From me.

   - Kill the separate atomic poll bit in the request struct. After
     this, we don't use the atomic bits on blk-mq anymore at all. From
     me.

   - sgl_alloc/free helpers from Bart.

   - Heavily contended tag case scalability improvement from me.

   - Various little fixes and cleanups from Arnd, Bart, Corentin,
     Douglas, Eryu, Goldwyn, and myself"

* 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits)
  block: remove smart1,2.h
  nvme: add tracepoint for nvme_complete_rq
  nvme: add tracepoint for nvme_setup_cmd
  nvme-pci: introduce RECONNECTING state to mark initializing procedure
  nvme-rdma: remove redundant boolean for inline_data
  nvme: don't free uuid pointer before printing it
  nvme-pci: Suspend queues after deleting them
  bsg: use pr_debug instead of hand crafted macros
  blk-mq-debugfs: don't allow write on attributes with seq_operations set
  nvme-pci: Fix queue double allocations
  block: Set BIO_TRACE_COMPLETION on new bio during split
  blk-throttle: use queue_is_rq_based
  block: Remove kblockd_schedule_delayed_work{,_on}()
  blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays
  blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly()
  lib/scatterlist: Fix chaining support in sgl_alloc_order()
  blk-throttle: track read and write request individually
  block: add bdev_read_only() checks to common helpers
  block: fail op_is_write() requests to read-only partitions
  blk-throttle: export io_serviced_recursive, io_service_bytes_recursive
  ...
  • Loading branch information
torvalds committed Jan 29, 2018
2 parents 9697e9d + 796baee commit 0a4b6e2
Show file tree
Hide file tree
Showing 124 changed files with 3,884 additions and 4,729 deletions.
7 changes: 5 additions & 2 deletions block/bfq-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -775,10 +775,11 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
unsigned long flags;
int i;

spin_lock_irqsave(&bfqd->lock, flags);

if (!entity) /* root group */
return;
goto put_async_queues;

spin_lock_irqsave(&bfqd->lock, flags);
/*
* Empty all service_trees belonging to this group before
* deactivating the group itself.
Expand Down Expand Up @@ -809,6 +810,8 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
}

__bfq_deactivate_entity(entity, false);

put_async_queues:
bfq_put_async_queues(bfqd, bfqg);

spin_unlock_irqrestore(&bfqd->lock, flags);
Expand Down
529 changes: 374 additions & 155 deletions block/bfq-iosched.c

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions block/bfq-iosched.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,13 +337,20 @@ struct bfq_queue {
* last transition from idle to backlogged.
*/
unsigned long service_from_backlogged;
/*
* Cumulative service received from the @bfq_queue since its
* last transition to weight-raised state.
*/
unsigned long service_from_wr;

/*
* Value of wr start time when switching to soft rt
*/
unsigned long wr_start_at_switch_to_srt;

unsigned long split_time; /* time of last split */

unsigned long first_IO_time; /* time of first I/O for this queue */
};

/**
Expand Down Expand Up @@ -627,6 +634,18 @@ struct bfq_data {
struct bfq_io_cq *bio_bic;
/* bfqq associated with the task issuing current bio for merging */
struct bfq_queue *bio_bfqq;

/*
* Cached sbitmap shift, used to compute depth limits in
* bfq_update_depths.
*/
unsigned int sb_shift;

/*
* Depth limits used in bfq_limit_depth (see comments on the
* function)
*/
unsigned int word_depths[2][2];
};

enum bfqq_state_flags {
Expand Down
7 changes: 7 additions & 0 deletions block/bfq-wf2q.c
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,13 @@ void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
struct bfq_entity *entity = &bfqq->entity;
struct bfq_service_tree *st;

if (!bfqq->service_from_backlogged)
bfqq->first_IO_time = jiffies;

if (bfqq->wr_coeff > 1)
bfqq->service_from_wr += served;

bfqq->service_from_backlogged += served;
for_each_entity(entity) {
st = bfq_entity_service_tree(entity);

Expand Down
1 change: 0 additions & 1 deletion block/bio-integrity.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,6 @@ static void bio_integrity_verify_fn(struct work_struct *work)
/**
* __bio_integrity_endio - Integrity I/O completion function
* @bio: Protected bio
* @error: Pointer to errno
*
* Description: Completion for integrity I/O
*
Expand Down
30 changes: 1 addition & 29 deletions block/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -970,34 +970,6 @@ void bio_advance(struct bio *bio, unsigned bytes)
}
EXPORT_SYMBOL(bio_advance);

/**
* bio_alloc_pages - allocates a single page for each bvec in a bio
* @bio: bio to allocate pages for
* @gfp_mask: flags for allocation
*
* Allocates pages up to @bio->bi_vcnt.
*
* Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
* freed.
*/
int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
{
int i;
struct bio_vec *bv;

bio_for_each_segment_all(bv, bio, i) {
bv->bv_page = alloc_page(gfp_mask);
if (!bv->bv_page) {
while (--bv >= bio->bi_io_vec)
__free_page(bv->bv_page);
return -ENOMEM;
}
}

return 0;
}
EXPORT_SYMBOL(bio_alloc_pages);

/**
* bio_copy_data - copy contents of data buffers from one chain of bios to
* another
Expand Down Expand Up @@ -1838,7 +1810,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
bio_advance(bio, split->bi_iter.bi_size);

if (bio_flagged(bio, BIO_TRACE_COMPLETION))
bio_set_flag(bio, BIO_TRACE_COMPLETION);
bio_set_flag(split, BIO_TRACE_COMPLETION);

return split;
}
Expand Down
87 changes: 52 additions & 35 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->start_time = jiffies;
set_start_time_ns(rq);
rq->part = NULL;
seqcount_init(&rq->gstate_seq);
u64_stats_init(&rq->aborted_gstate_sync);
}
EXPORT_SYMBOL(blk_rq_init);

Expand Down Expand Up @@ -699,6 +701,15 @@ void blk_cleanup_queue(struct request_queue *q)
queue_flag_set(QUEUE_FLAG_DEAD, q);
spin_unlock_irq(lock);

/*
* make sure all in-progress dispatch are completed because
* blk_freeze_queue() can only complete all requests, and
* dispatch may still be in-progress since we dispatch requests
* from more than one contexts
*/
if (q->mq_ops)
blk_mq_quiesce_queue(q);

/* for synchronous bio-based driver finish in-flight integrity i/o */
blk_flush_integrity();

Expand Down Expand Up @@ -1646,6 +1657,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)

lockdep_assert_held(q->queue_lock);

blk_req_zone_write_unlock(req);
blk_pm_put_request(req);

elv_completed_request(q, req);
Expand Down Expand Up @@ -2055,6 +2067,21 @@ static inline bool should_fail_request(struct hd_struct *part,

#endif /* CONFIG_FAIL_MAKE_REQUEST */

static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
{
if (part->policy && op_is_write(bio_op(bio))) {
char b[BDEVNAME_SIZE];

printk(KERN_ERR
"generic_make_request: Trying to write "
"to read-only block-device %s (partno %d)\n",
bio_devname(bio, b), part->partno);
return true;
}

return false;
}

/*
* Remap block n of partition p to block n+start(p) of the disk.
*/
Expand All @@ -2063,27 +2090,28 @@ static inline int blk_partition_remap(struct bio *bio)
struct hd_struct *p;
int ret = 0;

rcu_read_lock();
p = __disk_get_part(bio->bi_disk, bio->bi_partno);
if (unlikely(!p || should_fail_request(p, bio->bi_iter.bi_size) ||
bio_check_ro(bio, p))) {
ret = -EIO;
goto out;
}

/*
* Zone reset does not include bi_size so bio_sectors() is always 0.
* Include a test for the reset op code and perform the remap if needed.
*/
if (!bio->bi_partno ||
(!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET))
return 0;
if (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET)
goto out;

rcu_read_lock();
p = __disk_get_part(bio->bi_disk, bio->bi_partno);
if (likely(p && !should_fail_request(p, bio->bi_iter.bi_size))) {
bio->bi_iter.bi_sector += p->start_sect;
bio->bi_partno = 0;
trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
bio->bi_iter.bi_sector - p->start_sect);
} else {
printk("%s: fail for partition %d\n", __func__, bio->bi_partno);
ret = -EIO;
}
rcu_read_unlock();
bio->bi_iter.bi_sector += p->start_sect;
bio->bi_partno = 0;
trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
bio->bi_iter.bi_sector - p->start_sect);

out:
rcu_read_unlock();
return ret;
}

Expand Down Expand Up @@ -2142,15 +2170,19 @@ generic_make_request_checks(struct bio *bio)
* For a REQ_NOWAIT based request, return -EOPNOTSUPP
* if queue is not a request based queue.
*/

if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
goto not_supported;

if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
goto end_io;

if (blk_partition_remap(bio))
goto end_io;
if (!bio->bi_partno) {
if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
goto end_io;
} else {
if (blk_partition_remap(bio))
goto end_io;
}

if (bio_check_eod(bio, nr_sectors))
goto end_io;
Expand Down Expand Up @@ -2493,8 +2525,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
* bypass a potential scheduler on the bottom device for
* insert.
*/
blk_mq_request_bypass_insert(rq, true);
return BLK_STS_OK;
return blk_mq_request_issue_directly(rq);
}

spin_lock_irqsave(q->queue_lock, flags);
Expand Down Expand Up @@ -2846,7 +2877,7 @@ void blk_start_request(struct request *req)
wbt_issue(req->q->rq_wb, &req->issue_stat);
}

BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
BUG_ON(blk_rq_is_complete(req));
blk_add_timer(req);
}
EXPORT_SYMBOL(blk_start_request);
Expand Down Expand Up @@ -3415,20 +3446,6 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
}
EXPORT_SYMBOL(kblockd_mod_delayed_work_on);

int kblockd_schedule_delayed_work(struct delayed_work *dwork,
unsigned long delay)
{
return queue_delayed_work(kblockd_workqueue, dwork, delay);
}
EXPORT_SYMBOL(kblockd_schedule_delayed_work);

int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
unsigned long delay)
{
return queue_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
}
EXPORT_SYMBOL(kblockd_schedule_delayed_work_on);

/**
* blk_start_plug - initialize blk_plug and track it inside the task_struct
* @plug: The &struct blk_plug that needs to be initialized
Expand Down
2 changes: 1 addition & 1 deletion block/blk-exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
* be reused after dying flag is set
*/
if (q->mq_ops) {
blk_mq_sched_insert_request(rq, at_head, true, false, false);
blk_mq_sched_insert_request(rq, at_head, true, false);
return;
}

Expand Down
12 changes: 12 additions & 0 deletions block/blk-lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
if (!q)
return -ENXIO;

if (bdev_read_only(bdev))
return -EPERM;

if (flags & BLKDEV_DISCARD_SECURE) {
if (!blk_queue_secure_erase(q))
return -EOPNOTSUPP;
Expand Down Expand Up @@ -156,6 +159,9 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
if (!q)
return -ENXIO;

if (bdev_read_only(bdev))
return -EPERM;

bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
if ((sector | nr_sects) & bs_mask)
return -EINVAL;
Expand Down Expand Up @@ -233,6 +239,9 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
if (!q)
return -ENXIO;

if (bdev_read_only(bdev))
return -EPERM;

/* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);

Expand Down Expand Up @@ -287,6 +296,9 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
if (!q)
return -ENXIO;

if (bdev_read_only(bdev))
return -EPERM;

while (nr_sects != 0) {
bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
gfp_mask);
Expand Down
4 changes: 2 additions & 2 deletions block/blk-map.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
struct bio *bio = NULL;
struct iov_iter i;
int ret;
int ret = -EINVAL;

if (!iter_is_iovec(iter))
goto fail;
Expand Down Expand Up @@ -148,7 +148,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
__blk_rq_unmap_user(bio);
fail:
rq->bio = NULL;
return -EINVAL;
return ret;
}
EXPORT_SYMBOL(blk_rq_map_user_iov);

Expand Down
Loading

0 comments on commit 0a4b6e2

Please sign in to comment.