Skip to content

Commit

Permalink
Merge branch 'for-4.4/core' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull core block updates from Jens Axboe:
 "This is the core block pull request for 4.4.  I've got a few more
  topic branches this time around, some of them will layer on top of the
  core+drivers changes and will come in a separate round.  So not a huge
  chunk of changes in this round.

  This pull request contains:

   - Enable blk-mq page allocation tracking with kmemleak, from Catalin.

   - Unused prototype removal in blk-mq from Christoph.

   - Cleanup of the q->blk_trace exchange, using cmpxchg instead of two
     xchg()'s, from Davidlohr.

   - A plug flush fix from Jeff.

   - Also from Jeff, a fix that means we don't have to update shared tag
     sets at init time unless we do a state change.  This cuts down boot
     times on thousands of devices a lot with scsi/blk-mq.

   - blk-mq waitqueue barrier fix from Kosuke.

   - Various fixes from Ming:

        - Fixes for segment merging and splitting, and checks, for
          the old core and blk-mq.

        - Potential blk-mq speedup by marking ctx pending at the end
          of a plug insertion batch in blk-mq.

        - direct-io no page dirty on kernel direct reads.

   - A WRITE_SYNC fix for mpage from Roman"

* 'for-4.4/core' of git://git.kernel.dk/linux-block:
  blk-mq: avoid excessive boot delays with large lun counts
  blktrace: re-write setting q->blk_trace
  blk-mq: mark ctx as pending at batch in flush plug path
  blk-mq: fix for trace_block_plug()
  block: check bio_mergeable() early before merging
  blk-mq: check bio_mergeable() early before merging
  block: avoid to merge splitted bio
  block: setup bi_phys_segments after splitting
  block: fix plug list flushing for nomerge queues
  blk-mq: remove unused blk_mq_clone_flush_request prototype
  blk-mq: fix waitqueue_active without memory barrier in block/blk-mq-tag.c
  fs: direct-io: don't dirtying pages for ITER_BVEC/ITER_KVEC direct read
  fs/mpage.c: forgotten WRITE_SYNC in case of data integrity write
  block: kmemleak: Track the page allocations for struct request
  • Loading branch information
torvalds committed Nov 5, 2015
2 parents 0d51ce9 + 2404e60 commit d9734e0
Show file tree
Hide file tree
Showing 10 changed files with 148 additions and 62 deletions.
32 changes: 29 additions & 3 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1594,6 +1594,30 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
return ret;
}

unsigned int blk_plug_queued_count(struct request_queue *q)
{
struct blk_plug *plug;
struct request *rq;
struct list_head *plug_list;
unsigned int ret = 0;

plug = current->plug;
if (!plug)
goto out;

if (q->mq_ops)
plug_list = &plug->mq_list;
else
plug_list = &plug->list;

list_for_each_entry(rq, plug_list, queuelist) {
if (rq->q == q)
ret++;
}
out:
return ret;
}

void init_request_from_bio(struct request *req, struct bio *bio)
{
req->cmd_type = REQ_TYPE_FS;
Expand Down Expand Up @@ -1641,9 +1665,11 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
* Check if we can merge with the plugged list before grabbing
* any locks.
*/
if (!blk_queue_nomerges(q) &&
blk_attempt_plug_merge(q, bio, &request_count, NULL))
return;
if (!blk_queue_nomerges(q)) {
if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
return;
} else
request_count = blk_plug_queued_count(q);

spin_lock_irq(q->queue_lock);

Expand Down
32 changes: 25 additions & 7 deletions block/blk-merge.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,16 @@

static struct bio *blk_bio_discard_split(struct request_queue *q,
struct bio *bio,
struct bio_set *bs)
struct bio_set *bs,
unsigned *nsegs)
{
unsigned int max_discard_sectors, granularity;
int alignment;
sector_t tmp;
unsigned split_sectors;

*nsegs = 1;

/* Zero-sector (unknown) and one-sector granularities are the same. */
granularity = max(q->limits.discard_granularity >> 9, 1U);

Expand Down Expand Up @@ -51,8 +54,11 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,

static struct bio *blk_bio_write_same_split(struct request_queue *q,
struct bio *bio,
struct bio_set *bs)
struct bio_set *bs,
unsigned *nsegs)
{
*nsegs = 1;

if (!q->limits.max_write_same_sectors)
return NULL;

Expand All @@ -64,7 +70,8 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,

static struct bio *blk_bio_segment_split(struct request_queue *q,
struct bio *bio,
struct bio_set *bs)
struct bio_set *bs,
unsigned *segs)
{
struct bio_vec bv, bvprv, *bvprvp = NULL;
struct bvec_iter iter;
Expand Down Expand Up @@ -106,24 +113,35 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
sectors += bv.bv_len >> 9;
}

*segs = nsegs;
return NULL;
split:
*segs = nsegs;
return bio_split(bio, sectors, GFP_NOIO, bs);
}

void blk_queue_split(struct request_queue *q, struct bio **bio,
struct bio_set *bs)
{
struct bio *split;
struct bio *split, *res;
unsigned nsegs;

if ((*bio)->bi_rw & REQ_DISCARD)
split = blk_bio_discard_split(q, *bio, bs);
split = blk_bio_discard_split(q, *bio, bs, &nsegs);
else if ((*bio)->bi_rw & REQ_WRITE_SAME)
split = blk_bio_write_same_split(q, *bio, bs);
split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
else
split = blk_bio_segment_split(q, *bio, q->bio_split);
split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);

/* physical segments can be figured out during splitting */
res = split ? split : *bio;
res->bi_phys_segments = nsegs;
bio_set_flag(res, BIO_SEG_VALID);

if (split) {
/* there isn't chance to merge the splitted bio */
split->bi_rw |= REQ_NOMERGE;

bio_chain(split, *bio);
generic_make_request(*bio);
*bio = split;
Expand Down
4 changes: 4 additions & 0 deletions block/blk-mq-tag.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
struct blk_mq_bitmap_tags *bt;
int i, wake_index;

/*
* Make sure all changes prior to this are visible from other CPUs.
*/
smp_mb();
bt = &tags->bitmap_tags;
wake_index = atomic_read(&bt->wake_index);
for (i = 0; i < BT_WAIT_QUEUES; i++) {
Expand Down
89 changes: 62 additions & 27 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/kmemleak.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/slab.h>
Expand Down Expand Up @@ -989,18 +990,25 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
}
EXPORT_SYMBOL(blk_mq_delay_queue);

static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
struct request *rq, bool at_head)
static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx,
struct request *rq,
bool at_head)
{
struct blk_mq_ctx *ctx = rq->mq_ctx;

trace_block_rq_insert(hctx->queue, rq);

if (at_head)
list_add(&rq->queuelist, &ctx->rq_list);
else
list_add_tail(&rq->queuelist, &ctx->rq_list);
}

static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
struct request *rq, bool at_head)
{
struct blk_mq_ctx *ctx = rq->mq_ctx;

__blk_mq_insert_req_list(hctx, ctx, rq, at_head);
blk_mq_hctx_mark_pending(hctx, ctx);
}

Expand Down Expand Up @@ -1056,8 +1064,9 @@ static void blk_mq_insert_requests(struct request_queue *q,
rq = list_first_entry(list, struct request, queuelist);
list_del_init(&rq->queuelist);
rq->mq_ctx = ctx;
__blk_mq_insert_request(hctx, rq, false);
__blk_mq_insert_req_list(hctx, ctx, rq, false);
}
blk_mq_hctx_mark_pending(hctx, ctx);
spin_unlock(&ctx->lock);

blk_mq_run_hw_queue(hctx, from_schedule);
Expand Down Expand Up @@ -1139,7 +1148,7 @@ static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx,
struct request *rq, struct bio *bio)
{
if (!hctx_allow_merges(hctx)) {
if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
blk_mq_bio_to_request(rq, bio);
spin_lock(&ctx->lock);
insert_rq:
Expand Down Expand Up @@ -1267,9 +1276,12 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)

blk_queue_split(q, &bio, q->bio_split);

if (!is_flush_fua && !blk_queue_nomerges(q) &&
blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
return;
if (!is_flush_fua && !blk_queue_nomerges(q)) {
if (blk_attempt_plug_merge(q, bio, &request_count,
&same_queue_rq))
return;
} else
request_count = blk_plug_queued_count(q);

rq = blk_mq_map_request(q, bio, &data);
if (unlikely(!rq))
Expand Down Expand Up @@ -1376,7 +1388,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
plug = current->plug;
if (plug) {
blk_mq_bio_to_request(rq, bio);
if (list_empty(&plug->mq_list))
if (!request_count)
trace_block_plug(q);
else if (request_count >= BLK_MAX_REQUEST_COUNT) {
blk_flush_plug_list(plug, false);
Expand Down Expand Up @@ -1430,6 +1442,11 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
while (!list_empty(&tags->page_list)) {
page = list_first_entry(&tags->page_list, struct page, lru);
list_del_init(&page->lru);
/*
* Remove kmemleak object previously allocated in
* blk_mq_init_rq_map().
*/
kmemleak_free(page_address(page));
__free_pages(page, page->private);
}

Expand Down Expand Up @@ -1502,6 +1519,11 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
list_add_tail(&page->lru, &tags->page_list);

p = page_address(page);
/*
* Allow kmemleak to scan these pages as they contain pointers
* to additional allocations like via ops->init_request().
*/
kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL);
entries_per_page = order_to_size(this_order) / rq_size;
to_do = min(entries_per_page, set->queue_depth - i);
left -= to_do * rq_size;
Expand Down Expand Up @@ -1673,7 +1695,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
INIT_LIST_HEAD(&hctx->dispatch);
hctx->queue = q;
hctx->queue_num = hctx_idx;
hctx->flags = set->flags;
hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;

blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
blk_mq_hctx_notify, hctx);
Expand Down Expand Up @@ -1860,27 +1882,26 @@ static void blk_mq_map_swqueue(struct request_queue *q,
}
}

static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set)
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{
struct blk_mq_hw_ctx *hctx;
struct request_queue *q;
bool shared;
int i;

if (set->tag_list.next == set->tag_list.prev)
shared = false;
else
shared = true;
queue_for_each_hw_ctx(q, hctx, i) {
if (shared)
hctx->flags |= BLK_MQ_F_TAG_SHARED;
else
hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
}
}

static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
{
struct request_queue *q;

list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_freeze_queue(q);

queue_for_each_hw_ctx(q, hctx, i) {
if (shared)
hctx->flags |= BLK_MQ_F_TAG_SHARED;
else
hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
}
queue_set_hctx_shared(q, shared);
blk_mq_unfreeze_queue(q);
}
}
Expand All @@ -1891,7 +1912,12 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)

mutex_lock(&set->tag_list_lock);
list_del_init(&q->tag_set_list);
blk_mq_update_tag_set_depth(set);
if (list_is_singular(&set->tag_list)) {
/* just transitioned to unshared */
set->flags &= ~BLK_MQ_F_TAG_SHARED;
/* update existing queue */
blk_mq_update_tag_set_depth(set, false);
}
mutex_unlock(&set->tag_list_lock);
}

Expand All @@ -1901,8 +1927,17 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
q->tag_set = set;

mutex_lock(&set->tag_list_lock);

/* Check to see if we're transitioning to shared (from 1 to 2 queues). */
if (!list_empty(&set->tag_list) && !(set->flags & BLK_MQ_F_TAG_SHARED)) {
set->flags |= BLK_MQ_F_TAG_SHARED;
/* update existing queue */
blk_mq_update_tag_set_depth(set, true);
}
if (set->flags & BLK_MQ_F_TAG_SHARED)
queue_set_hctx_shared(q, true);
list_add_tail(&q->tag_set_list, &set->tag_list);
blk_mq_update_tag_set_depth(set);

mutex_unlock(&set->tag_list_lock);
}

Expand Down
2 changes: 0 additions & 2 deletions block/blk-mq.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ void __blk_mq_complete_request(struct request *rq);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
void blk_mq_clone_flush_request(struct request *flush_rq,
struct request *orig_rq);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);

Expand Down
1 change: 1 addition & 0 deletions block/blk.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int *request_count,
struct request **same_queue_rq);
unsigned int blk_plug_queued_count(struct request_queue *q);

void blk_account_io_start(struct request *req, bool new_io);
void blk_account_io_completion(struct request *req, unsigned int bytes);
Expand Down
2 changes: 1 addition & 1 deletion block/elevator.c
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
* noxmerges: Only simple one-hit cache try
* merges: All merge tries attempted
*/
if (blk_queue_nomerges(q))
if (blk_queue_nomerges(q) || !bio_mergeable(bio))
return ELEVATOR_NO_MERGE;

/*
Expand Down
9 changes: 6 additions & 3 deletions fs/direct-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ struct dio {
int page_errors; /* errno from get_user_pages() */
int is_async; /* is IO async ? */
bool defer_completion; /* defer AIO completion to workqueue? */
bool should_dirty; /* if pages should be dirtied */
int io_error; /* IO error in completion path */
unsigned long refcount; /* direct_io_worker() and bios */
struct bio *bio_list; /* singly linked via bi_private */
Expand Down Expand Up @@ -393,7 +394,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
dio->refcount++;
spin_unlock_irqrestore(&dio->bio_lock, flags);

if (dio->is_async && dio->rw == READ)
if (dio->is_async && dio->rw == READ && dio->should_dirty)
bio_set_pages_dirty(bio);

if (sdio->submit_io)
Expand Down Expand Up @@ -464,14 +465,15 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
if (bio->bi_error)
dio->io_error = -EIO;

if (dio->is_async && dio->rw == READ) {
if (dio->is_async && dio->rw == READ && dio->should_dirty) {
bio_check_pages_dirty(bio); /* transfers ownership */
err = bio->bi_error;
} else {
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;

if (dio->rw == READ && !PageCompound(page))
if (dio->rw == READ && !PageCompound(page) &&
dio->should_dirty)
set_page_dirty_lock(page);
page_cache_release(page);
}
Expand Down Expand Up @@ -1219,6 +1221,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
spin_lock_init(&dio->bio_lock);
dio->refcount = 1;

dio->should_dirty = (iter->type == ITER_IOVEC);
sdio.iter = iter;
sdio.final_block_in_request =
(offset + iov_iter_count(iter)) >> blkbits;
Expand Down
Loading

0 comments on commit d9734e0

Please sign in to comment.