Skip to content

Commit

Permalink
Merge branch 'for-3.13/core' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block IO core updates from Jens Axboe:
 "This is the pull request for the core changes in the block layer for
  3.13.  It contains:

   - The new blk-mq request interface.

     This is a new and more scalable queueing model that marries the
     best part of the request based interface we currently have (which
     is fully featured, but scales poorly) and the bio based "interface"
     which the new drivers for high IOPS devices end up using because
     it's much faster than the request based one.

     The bio interface has no block layer support, since it taps into
     the stack much earlier.  This means that drivers end up having to
     implement a lot of functionality on their own, like tagging,
     timeout handling, requeue, etc.  The blk-mq interface provides all
     these.  Some drivers even provide a switch to select bio or rq and
     has code to handle both, since things like merging only works in
     the rq model and hence is faster for some workloads.  This is a
     huge mess.  Conversion of these drivers nets us a substantial code
     reduction.  Initial results on converting SCSI to this model even
     shows an 8x improvement on single queue devices.  So while the
     model was intended to work on the newer multiqueue devices, it has
     substantial improvements for "classic" hardware as well.  This code
     has gone through extensive testing and development, it's now ready
     to go.  A pull request is coming to convert virtio-blk to this
     model will be will be coming as well, with more drivers scheduled
     for 3.14 conversion.

   - Two blktrace fixes from Jan and Chen Gang.

   - A plug merge fix from Alireza Haghdoost.

   - Conversion of __get_cpu_var() from Christoph Lameter.

   - Fix for sector_div() with 64-bit divider from Geert Uytterhoeven.

   - A fix for a race between request completion and the timeout
     handling from Jeff Moyer.  This is what caused the merge conflict
     with blk-mq/core, in case you are looking at that.

   - A dm stacking fix from Mike Snitzer.

   - A code consolidation fix and duplicated code removal from Kent
     Overstreet.

   - A handful of block bug fixes from Mikulas Patocka, fixing a loop
     crash and memory corruption on blk cg.

   - Elevator switch bug fix from Tomoki Sekiyama.

  A heads-up that I had to rebase this branch.  Initially the immutable
  bio_vecs had been queued up for inclusion, but a week later, it became
  clear that it wasn't fully cooked yet.  So the decision was made to
  pull this out and postpone it until 3.14.  It was a straight forward
  rebase, just pruning out the immutable series and the later fixes of
  problems with it.  The rest of the patches applied directly and no
  further changes were made"

* 'for-3.13/core' of git://git.kernel.dk/linux-block: (31 commits)
  block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
  block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
  block: Do not call sector_div() with a 64-bit divisor
  kernel: trace: blktrace: remove redundent memcpy() in compat_blk_trace_setup()
  block: Consolidate duplicated bio_trim() implementations
  block: Use rw_copy_check_uvector()
  block: Enable sysfs nomerge control for I/O requests in the plug list
  block: properly stack underlying max_segment_size to DM device
  elevator: acquire q->sysfs_lock in elevator_change()
  elevator: Fix a race in elevator switching and md device initialization
  block: Replace __get_cpu_var uses
  bdi: test bdi_init failure
  block: fix a probe argument to blk_register_region
  loop: fix crash if blk_alloc_queue fails
  blk-core: Fix memory corruption if blkcg_init_queue fails
  block: fix race between request completion and timeout handling
  blktrace: Send BLK_TN_PROCESS events to all running traces
  blk-mq: don't disallow request merges for req->special being set
  blk-mq: mq plug list breakage
  blk-mq: fix for flush deadlock
  ...
  • Loading branch information
torvalds committed Nov 14, 2013
2 parents 2821fe6 + e37459b commit 0910c0b
Show file tree
Hide file tree
Showing 49 changed files with 3,885 additions and 364 deletions.
5 changes: 3 additions & 2 deletions block/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o \
partition-generic.o partitions/
blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o partitions/

obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
Expand Down
175 changes: 109 additions & 66 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/highmem.h>
#include <linux/mm.h>
#include <linux/kernel_stat.h>
Expand Down Expand Up @@ -48,7 +49,7 @@ DEFINE_IDA(blk_queue_ida);
/*
* For the allocated request tables
*/
static struct kmem_cache *request_cachep;
struct kmem_cache *request_cachep = NULL;

/*
* For queue allocation
Expand All @@ -60,42 +61,6 @@ struct kmem_cache *blk_requestq_cachep;
*/
static struct workqueue_struct *kblockd_workqueue;

static void drive_stat_acct(struct request *rq, int new_io)
{
struct hd_struct *part;
int rw = rq_data_dir(rq);
int cpu;

if (!blk_do_io_stat(rq))
return;

cpu = part_stat_lock();

if (!new_io) {
part = rq->part;
part_stat_inc(cpu, part, merges[rw]);
} else {
part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
if (!hd_struct_try_get(part)) {
/*
* The partition is already being removed,
* the request will be accounted on the disk only
*
* We take a reference on disk->part0 although that
* partition will never be deleted, so we can treat
* it as any other partition.
*/
part = &rq->rq_disk->part0;
hd_struct_get(part);
}
part_round_stats(cpu, part);
part_inc_in_flight(part, rw);
rq->part = part;
}

part_stat_unlock();
}

void blk_queue_congestion_threshold(struct request_queue *q)
{
int nr;
Expand Down Expand Up @@ -145,7 +110,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->cmd = rq->__cmd;
rq->cmd_len = BLK_MAX_CDB;
rq->tag = -1;
rq->ref_count = 1;
rq->start_time = jiffies;
set_start_time_ns(rq);
rq->part = NULL;
Expand Down Expand Up @@ -174,9 +138,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
{
int bit;

printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg,
rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
rq->cmd_flags);
(unsigned long long) rq->cmd_flags);

printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
(unsigned long long)blk_rq_pos(rq),
Expand Down Expand Up @@ -595,9 +559,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (!q)
return NULL;

if (percpu_counter_init(&q->mq_usage_counter, 0))
goto fail_q;

q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
if (q->id < 0)
goto fail_q;
goto fail_c;

q->backing_dev_info.ra_pages =
(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
Expand Down Expand Up @@ -644,13 +611,19 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
q->bypass_depth = 1;
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);

init_waitqueue_head(&q->mq_freeze_wq);

if (blkcg_init_queue(q))
goto fail_id;
goto fail_bdi;

return q;

fail_bdi:
bdi_destroy(&q->backing_dev_info);
fail_id:
ida_simple_remove(&blk_queue_ida, q->id);
fail_c:
percpu_counter_destroy(&q->mq_usage_counter);
fail_q:
kmem_cache_free(blk_requestq_cachep, q);
return NULL;
Expand Down Expand Up @@ -739,9 +712,17 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,

q->sg_reserved_size = INT_MAX;

/* Protect q->elevator from elevator_change */
mutex_lock(&q->sysfs_lock);

/* init elevator */
if (elevator_init(q, NULL))
if (elevator_init(q, NULL)) {
mutex_unlock(&q->sysfs_lock);
return NULL;
}

mutex_unlock(&q->sysfs_lock);

return q;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
Expand Down Expand Up @@ -1109,7 +1090,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
goto retry;
}

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
static struct request *blk_old_get_request(struct request_queue *q, int rw,
gfp_t gfp_mask)
{
struct request *rq;

Expand All @@ -1126,6 +1108,14 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)

return rq;
}

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
{
if (q->mq_ops)
return blk_mq_alloc_request(q, rw, gfp_mask, false);
else
return blk_old_get_request(q, rw, gfp_mask);
}
EXPORT_SYMBOL(blk_get_request);

/**
Expand Down Expand Up @@ -1211,7 +1201,7 @@ EXPORT_SYMBOL(blk_requeue_request);
static void add_acct_request(struct request_queue *q, struct request *rq,
int where)
{
drive_stat_acct(rq, 1);
blk_account_io_start(rq, true);
__elv_add_request(q, rq, where);
}

Expand Down Expand Up @@ -1272,8 +1262,6 @@ void __blk_put_request(struct request_queue *q, struct request *req)
{
if (unlikely(!q))
return;
if (unlikely(--req->ref_count))
return;

blk_pm_put_request(req);

Expand Down Expand Up @@ -1302,12 +1290,17 @@ EXPORT_SYMBOL_GPL(__blk_put_request);

void blk_put_request(struct request *req)
{
unsigned long flags;
struct request_queue *q = req->q;

spin_lock_irqsave(q->queue_lock, flags);
__blk_put_request(q, req);
spin_unlock_irqrestore(q->queue_lock, flags);
if (q->mq_ops)
blk_mq_free_request(req);
else {
unsigned long flags;

spin_lock_irqsave(q->queue_lock, flags);
__blk_put_request(q, req);
spin_unlock_irqrestore(q->queue_lock, flags);
}
}
EXPORT_SYMBOL(blk_put_request);

Expand Down Expand Up @@ -1343,8 +1336,8 @@ void blk_add_request_payload(struct request *rq, struct page *page,
}
EXPORT_SYMBOL_GPL(blk_add_request_payload);

static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
struct bio *bio)
bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
struct bio *bio)
{
const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

Expand All @@ -1361,12 +1354,12 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
req->__data_len += bio->bi_size;
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

drive_stat_acct(req, 0);
blk_account_io_start(req, false);
return true;
}

static bool bio_attempt_front_merge(struct request_queue *q,
struct request *req, struct bio *bio)
bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
struct bio *bio)
{
const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

Expand All @@ -1391,12 +1384,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
req->__data_len += bio->bi_size;
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

drive_stat_acct(req, 0);
blk_account_io_start(req, false);
return true;
}

/**
* attempt_plug_merge - try to merge with %current's plugged list
* blk_attempt_plug_merge - try to merge with %current's plugged list
* @q: request_queue new bio is being queued at
* @bio: new bio being queued
* @request_count: out parameter for number of traversed plugged requests
Expand All @@ -1412,19 +1405,28 @@ static bool bio_attempt_front_merge(struct request_queue *q,
* reliable access to the elevator outside queue lock. Only check basic
* merging parameters without querying the elevator.
*/
static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int *request_count)
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int *request_count)
{
struct blk_plug *plug;
struct request *rq;
bool ret = false;
struct list_head *plug_list;

if (blk_queue_nomerges(q))
goto out;

plug = current->plug;
if (!plug)
goto out;
*request_count = 0;

list_for_each_entry_reverse(rq, &plug->list, queuelist) {
if (q->mq_ops)
plug_list = &plug->mq_list;
else
plug_list = &plug->list;

list_for_each_entry_reverse(rq, plug_list, queuelist) {
int el_ret;

if (rq->q == q)
Expand Down Expand Up @@ -1492,7 +1494,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
* Check if we can merge with the plugged list before grabbing
* any locks.
*/
if (attempt_plug_merge(q, bio, &request_count))
if (blk_attempt_plug_merge(q, bio, &request_count))
return;

spin_lock_irq(q->queue_lock);
Expand Down Expand Up @@ -1560,7 +1562,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
}
}
list_add_tail(&req->queuelist, &plug->list);
drive_stat_acct(req, 1);
blk_account_io_start(req, true);
} else {
spin_lock_irq(q->queue_lock);
add_acct_request(q, req, where);
Expand Down Expand Up @@ -2014,7 +2016,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
}
EXPORT_SYMBOL_GPL(blk_rq_err_bytes);

static void blk_account_io_completion(struct request *req, unsigned int bytes)
void blk_account_io_completion(struct request *req, unsigned int bytes)
{
if (blk_do_io_stat(req)) {
const int rw = rq_data_dir(req);
Expand All @@ -2028,7 +2030,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
}
}

static void blk_account_io_done(struct request *req)
void blk_account_io_done(struct request *req)
{
/*
* Account IO completion. flush_rq isn't accounted as a
Expand Down Expand Up @@ -2076,6 +2078,42 @@ static inline struct request *blk_pm_peek_request(struct request_queue *q,
}
#endif

void blk_account_io_start(struct request *rq, bool new_io)
{
struct hd_struct *part;
int rw = rq_data_dir(rq);
int cpu;

if (!blk_do_io_stat(rq))
return;

cpu = part_stat_lock();

if (!new_io) {
part = rq->part;
part_stat_inc(cpu, part, merges[rw]);
} else {
part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
if (!hd_struct_try_get(part)) {
/*
* The partition is already being removed,
* the request will be accounted on the disk only
*
* We take a reference on disk->part0 although that
* partition will never be deleted, so we can treat
* it as any other partition.
*/
part = &rq->rq_disk->part0;
hd_struct_get(part);
}
part_round_stats(cpu, part);
part_inc_in_flight(part, rw);
rq->part = part;
}

part_stat_unlock();
}

/**
* blk_peek_request - peek at the top of a request queue
* @q: request queue to peek at
Expand Down Expand Up @@ -2227,6 +2265,7 @@ void blk_start_request(struct request *req)
if (unlikely(blk_bidi_rq(req)))
req->next_rq->resid_len = blk_rq_bytes(req->next_rq);

BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
blk_add_timer(req);
}
EXPORT_SYMBOL(blk_start_request);
Expand Down Expand Up @@ -2451,7 +2490,6 @@ static void blk_finish_request(struct request *req, int error)
if (req->cmd_flags & REQ_DONTPREP)
blk_unprep_request(req);


blk_account_io_done(req);

if (req->end_io)
Expand Down Expand Up @@ -2873,6 +2911,7 @@ void blk_start_plug(struct blk_plug *plug)

plug->magic = PLUG_MAGIC;
INIT_LIST_HEAD(&plug->list);
INIT_LIST_HEAD(&plug->mq_list);
INIT_LIST_HEAD(&plug->cb_list);

/*
Expand Down Expand Up @@ -2970,6 +3009,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
BUG_ON(plug->magic != PLUG_MAGIC);

flush_plug_callbacks(plug, from_schedule);

if (!list_empty(&plug->mq_list))
blk_mq_flush_plug_list(plug, from_schedule);

if (list_empty(&plug->list))
return;

Expand Down
Loading

0 comments on commit 0910c0b

Please sign in to comment.