Skip to content

Commit

Permalink
Merge branch 'for-3.19/core' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block driver core update from Jens Axboe:
 "This is the pull request for the core block IO changes for 3.19.  Not
  a huge round this time, mostly lots of little good fixes:

   - Fix a bug in sysfs blktrace interface causing a NULL pointer
     dereference, when enabled/disabled through that API.  From Arianna
     Avanzini.

   - Various updates/fixes/improvements for blk-mq:

        - A set of updates from Bart, mostly fixing buts in the tag
          handling.

        - Cleanup/code consolidation from Christoph.

        - Extend queue_rq API to be able to handle batching issues of IO
          requests. NVMe will utilize this shortly. From me.

        - A few tag and request handling updates from me.

        - Cleanup of the preempt handling for running queues from Paolo.

        - Prevent running of unmapped hardware queues from Ming Lei.

        - Move the kdump memory limiting check to be in the correct
          location, from Shaohua.

        - Initialize all software queues at init time from Takashi. This
          prevents a kobject warning when CPUs are brought online that
          weren't online when a queue was registered.

   - Single writeback fix for I_DIRTY clearing from Tejun.  Queued with
     the core IO changes, since it's just a single fix.

   - Version X of the __bio_add_page() segment addition retry from
     Maurizio.  Hope the Xth time is the charm.

   - Documentation fixup for IO scheduler merging from Jan.

   - Introduce (and use) generic IO stat accounting helpers for non-rq
     drivers, from Gu Zheng.

   - Kill off artificial limiting of max sectors in a request from
     Christoph"

* 'for-3.19/core' of git://git.kernel.dk/linux-block: (26 commits)
  bio: modify __bio_add_page() to accept pages that don't start a new segment
  blk-mq: Fix uninitialized kobject at CPU hotplugging
  blktrace: don't let the sysfs interface remove trace from running list
  blk-mq: Use all available hardware queues
  blk-mq: Micro-optimize bt_get()
  blk-mq: Fix a race between bt_clear_tag() and bt_get()
  blk-mq: Avoid that __bt_get_word() wraps multiple times
  blk-mq: Fix a use-after-free
  blk-mq: prevent unmapped hw queue from being scheduled
  blk-mq: re-check for available tags after running the hardware queue
  blk-mq: fix hang in bt_get()
  blk-mq: move the kdump check to blk_mq_alloc_tag_set
  blk-mq: cleanup tag free handling
  blk-mq: use 'nr_cpu_ids' as highest CPU ID count for hwq <-> cpu map
  blk: introduce generic io stat accounting help function
  blk-mq: handle the single queue case in blk_mq_hctx_next_cpu
  genhd: check for int overflow in disk_expand_part_tbl()
  blk-mq: add blk_mq_free_hctx_request()
  blk-mq: export blk_mq_free_request()
  blk-mq: use get_cpu/put_cpu instead of preempt_disable/preempt_enable
  ...
  • Loading branch information
torvalds committed Dec 13, 2014
2 parents 8f4385d + fcbf6a0 commit caf292a
Show file tree
Hide file tree
Showing 21 changed files with 254 additions and 145 deletions.
6 changes: 5 additions & 1 deletion Documentation/block/biodoc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,11 @@ elevator_allow_merge_fn called whenever the block layer determines
request safely. The io scheduler may still
want to stop a merge at this point if it
results in some sort of conflict internally,
this hook allows it to do that.
this hook allows it to do that. Note however
that two *requests* can still be merged at later
time. Currently the io scheduler has no way to
prevent that. It can only learn about the fact
from elevator_merge_req_fn callback.

elevator_dispatch_fn* fills the dispatch queue with ready requests.
I/O schedulers are free to postpone requests by
Expand Down
82 changes: 58 additions & 24 deletions block/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
}
}

bio->bi_iter.bi_size += len;
goto done;
}

Expand All @@ -764,28 +765,31 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
return 0;

/*
* we might lose a segment or two here, but rather that than
* make this too complex.
* setup the new entry, we might clear it again later if we
* cannot add the page
*/
bvec = &bio->bi_io_vec[bio->bi_vcnt];
bvec->bv_page = page;
bvec->bv_len = len;
bvec->bv_offset = offset;
bio->bi_vcnt++;
bio->bi_phys_segments++;
bio->bi_iter.bi_size += len;

/*
* Perform a recount if the number of segments is greater
* than queue_max_segments(q).
*/

while (bio->bi_phys_segments >= queue_max_segments(q)) {
while (bio->bi_phys_segments > queue_max_segments(q)) {

if (retried_segments)
return 0;
goto failed;

retried_segments = 1;
blk_recount_segments(q, bio);
}

/*
* setup the new entry, we might clear it again later if we
* cannot add the page
*/
bvec = &bio->bi_io_vec[bio->bi_vcnt];
bvec->bv_page = page;
bvec->bv_len = len;
bvec->bv_offset = offset;

/*
* if queue has other restrictions (eg varying max sector size
* depending on offset), it can specify a merge_bvec_fn in the
Expand All @@ -795,31 +799,33 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
struct bvec_merge_data bvm = {
.bi_bdev = bio->bi_bdev,
.bi_sector = bio->bi_iter.bi_sector,
.bi_size = bio->bi_iter.bi_size,
.bi_size = bio->bi_iter.bi_size - len,
.bi_rw = bio->bi_rw,
};

/*
* merge_bvec_fn() returns number of bytes it can accept
* at this offset
*/
if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
bvec->bv_page = NULL;
bvec->bv_len = 0;
bvec->bv_offset = 0;
return 0;
}
if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len)
goto failed;
}

/* If we may be able to merge these biovecs, force a recount */
if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
bio->bi_flags &= ~(1 << BIO_SEG_VALID);

bio->bi_vcnt++;
bio->bi_phys_segments++;
done:
bio->bi_iter.bi_size += len;
return len;

failed:
bvec->bv_page = NULL;
bvec->bv_len = 0;
bvec->bv_offset = 0;
bio->bi_vcnt--;
bio->bi_iter.bi_size -= len;
blk_recount_segments(q, bio);
return 0;
}

/**
Expand Down Expand Up @@ -1739,6 +1745,34 @@ void bio_check_pages_dirty(struct bio *bio)
}
}

void generic_start_io_acct(int rw, unsigned long sectors,
struct hd_struct *part)
{
int cpu = part_stat_lock();

part_round_stats(cpu, part);
part_stat_inc(cpu, part, ios[rw]);
part_stat_add(cpu, part, sectors[rw], sectors);
part_inc_in_flight(part, rw);

part_stat_unlock();
}
EXPORT_SYMBOL(generic_start_io_acct);

void generic_end_io_acct(int rw, struct hd_struct *part,
unsigned long start_time)
{
unsigned long duration = jiffies - start_time;
int cpu = part_stat_lock();

part_stat_add(cpu, part, ticks[rw], duration);
part_round_stats(cpu, part);
part_dec_in_flight(part, rw);

part_stat_unlock();
}
EXPORT_SYMBOL(generic_end_io_acct);

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
void bio_flush_dcache_pages(struct bio *bi)
{
Expand Down
3 changes: 3 additions & 0 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,9 @@ void blk_cleanup_queue(struct request_queue *q)
del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
blk_sync_queue(q);

if (q->mq_ops)
blk_mq_free_queue(q);

spin_lock_irq(lock);
if (q->queue_lock != &q->__queue_lock)
q->queue_lock = &q->__queue_lock;
Expand Down
4 changes: 2 additions & 2 deletions block/blk-mq-cpumap.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
const int cpu)
{
return cpu / ((nr_cpus + nr_queues - 1) / nr_queues);
return cpu * nr_queues / nr_cpus;
}

static int get_first_sibling(unsigned int cpu)
Expand Down Expand Up @@ -90,7 +90,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
unsigned int *map;

/* If cpus are offline, map them to first hctx */
map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL,
map = kzalloc_node(sizeof(*map) * nr_cpu_ids, GFP_KERNEL,
set->numa_node);
if (!map)
return NULL;
Expand Down
9 changes: 4 additions & 5 deletions block/blk-mq-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -390,16 +390,15 @@ static void blk_mq_sysfs_init(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
int i, j;
int i;

kobject_init(&q->mq_kobj, &blk_mq_ktype);

queue_for_each_hw_ctx(q, hctx, i) {
queue_for_each_hw_ctx(q, hctx, i)
kobject_init(&hctx->kobj, &blk_mq_hw_ktype);

hctx_for_each_ctx(hctx, ctx, j)
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
}
queue_for_each_ctx(q, ctx, i)
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
}

/* see blk_register_queue() */
Expand Down
60 changes: 31 additions & 29 deletions block/blk-mq-tag.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
{
int tag, org_last_tag, end;
bool wrap = last_tag != 0;

org_last_tag = last_tag;
end = bm->depth;
Expand All @@ -148,15 +149,16 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
* We started with an offset, start from 0 to
* exhaust the map.
*/
if (org_last_tag && last_tag) {
end = last_tag;
if (wrap) {
wrap = false;
end = org_last_tag;
last_tag = 0;
goto restart;
}
return -1;
}
last_tag = tag + 1;
} while (test_and_set_bit_lock(tag, &bm->word));
} while (test_and_set_bit(tag, &bm->word));

return tag;
}
Expand Down Expand Up @@ -246,14 +248,29 @@ static int bt_get(struct blk_mq_alloc_data *data,
if (!(data->gfp & __GFP_WAIT))
return -1;

bs = bt_wait_ptr(bt, hctx);
do {
bs = bt_wait_ptr(bt, hctx);
prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);

tag = __bt_get(hctx, bt, last_tag);
if (tag != -1)
break;

/*
* We're out of tags on this hardware queue, kick any
* pending IO submits before going to sleep waiting for
* some to complete.
*/
blk_mq_run_hw_queue(hctx, false);

/*
* Retry tag allocation after running the hardware queue,
* as running the queue may also have found completions.
*/
tag = __bt_get(hctx, bt, last_tag);
if (tag != -1)
break;

blk_mq_put_ctx(data->ctx);

io_schedule();
Expand All @@ -268,8 +285,6 @@ static int bt_get(struct blk_mq_alloc_data *data,
hctx = data->hctx;
bt = &hctx->tags->bitmap_tags;
}
finish_wait(&bs->wait, &wait);
bs = bt_wait_ptr(bt, hctx);
} while (1);

finish_wait(&bs->wait, &wait);
Expand Down Expand Up @@ -340,11 +355,10 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
struct bt_wait_state *bs;
int wait_cnt;

/*
* The unlock memory barrier need to order access to req in free
* path and clearing tag bit
*/
clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word);
clear_bit(TAG_TO_BIT(bt, tag), &bt->map[index].word);

/* Ensure that the wait list checks occur after clear_bit(). */
smp_mb();

bs = bt_wake_ptr(bt);
if (!bs)
Expand All @@ -360,21 +374,6 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
}
}

static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag)
{
BUG_ON(tag >= tags->nr_tags);

bt_clear_tag(&tags->bitmap_tags, tag);
}

static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
unsigned int tag)
{
BUG_ON(tag >= tags->nr_reserved_tags);

bt_clear_tag(&tags->breserved_tags, tag);
}

void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
unsigned int *last_tag)
{
Expand All @@ -383,10 +382,13 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
if (tag >= tags->nr_reserved_tags) {
const int real_tag = tag - tags->nr_reserved_tags;

__blk_mq_put_tag(tags, real_tag);
BUG_ON(real_tag >= tags->nr_tags);
bt_clear_tag(&tags->bitmap_tags, real_tag);
*last_tag = real_tag;
} else
__blk_mq_put_reserved_tag(tags, tag);
} else {
BUG_ON(tag >= tags->nr_reserved_tags);
bt_clear_tag(&tags->breserved_tags, tag);
}
}

static void bt_for_each(struct blk_mq_hw_ctx *hctx,
Expand Down
Loading

0 comments on commit caf292a

Please sign in to comment.