Skip to content

Commit

Permalink
Merge branch 'for-4.2/core' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull core block IO update from Jens Axboe:
 "Nothing really major in here, mostly a collection of smaller
  optimizations and cleanups, mixed with various fixes.  In more detail,
  this contains:

   - Addition of policy specific data to blkcg for block cgroups.  From
     Arianna Avanzini.

   - Various cleanups around command types from Christoph.

   - Cleanup of the suspend block I/O path from Christoph.

   - Plugging updates from Shaohua and Jeff Moyer, for blk-mq.

   - Eliminating atomic inc/dec of both remaining IO count and reference
     count in a bio.  From me.

   - Fixes for SG gap and chunk size support for data-less (discards)
     IO, so we can merge these better.  From me.

   - Small restructuring of blk-mq shared tag support, freeing drivers
     from iterating hardware queues.  From Keith Busch.

   - A few cfq-iosched tweaks, from Tahsin Erdogan and me.  Makes the
     IOPS mode the default for non-rotational storage"

* 'for-4.2/core' of git://git.kernel.dk/linux-block: (35 commits)
  cfq-iosched: fix other locations where blkcg_to_cfqgd() can return NULL
  cfq-iosched: fix sysfs oops when attempting to read unconfigured weights
  cfq-iosched: move group scheduling functions under ifdef
  cfq-iosched: fix the setting of IOPS mode on SSDs
  blktrace: Add blktrace.c to BLOCK LAYER in MAINTAINERS file
  block, cgroup: implement policy-specific per-blkcg data
  block: Make CFQ default to IOPS mode on SSDs
  block: add blk_set_queue_dying() to blkdev.h
  blk-mq: Shared tag enhancements
  block: don't honor chunk sizes for data-less IO
  block: only honor SG gap prevention for merges that contain data
  block: fix returnvar.cocci warnings
  block, dm: don't copy bios for request clones
  block: remove management of bi_remaining when restoring original bi_end_io
  block: replace trylock with mutex_lock in blkdev_reread_part()
  block: export blkdev_reread_part() and __blkdev_reread_part()
  suspend: simplify block I/O handling
  block: collapse bio bit space
  block: remove unused BIO_RW_BLOCK and BIO_EOF flags
  block: remove BIO_EOPNOTSUPP
  ...
  • Loading branch information
torvalds committed Jun 25, 2015
2 parents cc8a0a9 + ae994ea commit bfffa1c
Show file tree
Hide file tree
Showing 64 changed files with 852 additions and 753 deletions.
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -2075,6 +2075,7 @@ M: Jens Axboe <[email protected]>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
S: Maintained
F: block/
F: kernel/trace/blktrace.c

BLOCK2MTD DRIVER
M: Joern Engel <[email protected]>
Expand Down
4 changes: 2 additions & 2 deletions block/bio-integrity.c
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)

/* Restore original bio completion handler */
bio->bi_end_io = bip->bip_end_io;
bio_endio_nodec(bio, error);
bio_endio(bio, error);
}

/**
Expand All @@ -388,7 +388,7 @@ void bio_integrity_endio(struct bio *bio, int error)
*/
if (error) {
bio->bi_end_io = bip->bip_end_io;
bio_endio_nodec(bio, error);
bio_endio(bio, error);

return;
}
Expand Down
77 changes: 47 additions & 30 deletions block/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,8 @@ void bio_init(struct bio *bio)
{
memset(bio, 0, sizeof(*bio));
bio->bi_flags = 1 << BIO_UPTODATE;
atomic_set(&bio->bi_remaining, 1);
atomic_set(&bio->bi_cnt, 1);
atomic_set(&bio->__bi_remaining, 1);
atomic_set(&bio->__bi_cnt, 1);
}
EXPORT_SYMBOL(bio_init);

Expand All @@ -292,8 +292,8 @@ void bio_reset(struct bio *bio)
__bio_free(bio);

memset(bio, 0, BIO_RESET_BYTES);
bio->bi_flags = flags|(1 << BIO_UPTODATE);
atomic_set(&bio->bi_remaining, 1);
bio->bi_flags = flags | (1 << BIO_UPTODATE);
atomic_set(&bio->__bi_remaining, 1);
}
EXPORT_SYMBOL(bio_reset);

Expand All @@ -303,6 +303,17 @@ static void bio_chain_endio(struct bio *bio, int error)
bio_put(bio);
}

/*
* Increment chain count for the bio. Make sure the CHAIN flag update
* is visible before the raised count.
*/
static inline void bio_inc_remaining(struct bio *bio)
{
bio->bi_flags |= (1 << BIO_CHAIN);
smp_mb__before_atomic();
atomic_inc(&bio->__bi_remaining);
}

/**
* bio_chain - chain bio completions
* @bio: the target bio
Expand All @@ -320,7 +331,7 @@ void bio_chain(struct bio *bio, struct bio *parent)

bio->bi_private = parent;
bio->bi_end_io = bio_chain_endio;
atomic_inc(&parent->bi_remaining);
bio_inc_remaining(parent);
}
EXPORT_SYMBOL(bio_chain);

Expand Down Expand Up @@ -524,13 +535,17 @@ EXPORT_SYMBOL(zero_fill_bio);
**/
void bio_put(struct bio *bio)
{
BIO_BUG_ON(!atomic_read(&bio->bi_cnt));

/*
* last put frees it
*/
if (atomic_dec_and_test(&bio->bi_cnt))
if (!bio_flagged(bio, BIO_REFFED))
bio_free(bio);
else {
BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));

/*
* last put frees it
*/
if (atomic_dec_and_test(&bio->__bi_cnt))
bio_free(bio);
}
}
EXPORT_SYMBOL(bio_put);

Expand Down Expand Up @@ -1741,6 +1756,25 @@ void bio_flush_dcache_pages(struct bio *bi)
EXPORT_SYMBOL(bio_flush_dcache_pages);
#endif

static inline bool bio_remaining_done(struct bio *bio)
{
/*
* If we're not chaining, then ->__bi_remaining is always 1 and
* we always end io on the first invocation.
*/
if (!bio_flagged(bio, BIO_CHAIN))
return true;

BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);

if (atomic_dec_and_test(&bio->__bi_remaining)) {
clear_bit(BIO_CHAIN, &bio->bi_flags);
return true;
}

return false;
}

/**
* bio_endio - end I/O on a bio
* @bio: bio
Expand All @@ -1758,15 +1792,13 @@ EXPORT_SYMBOL(bio_flush_dcache_pages);
void bio_endio(struct bio *bio, int error)
{
while (bio) {
BUG_ON(atomic_read(&bio->bi_remaining) <= 0);

if (error)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
error = -EIO;

if (!atomic_dec_and_test(&bio->bi_remaining))
return;
if (unlikely(!bio_remaining_done(bio)))
break;

/*
* Need to have a real endio function for chained bios,
Expand All @@ -1789,21 +1821,6 @@ void bio_endio(struct bio *bio, int error)
}
EXPORT_SYMBOL(bio_endio);

/**
* bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining
* @bio: bio
* @error: error, if any
*
* For code that has saved and restored bi_end_io; thing hard before using this
* function, probably you should've cloned the entire bio.
**/
void bio_endio_nodec(struct bio *bio, int error)
{
atomic_inc(&bio->bi_remaining);
bio_endio(bio, error);
}
EXPORT_SYMBOL(bio_endio_nodec);

/**
* bio_split - split a bio
* @bio: bio to split
Expand Down
92 changes: 81 additions & 11 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
*
* Copyright (C) 2009 Vivek Goyal <[email protected]>
* Nauman Rafique <[email protected]>
*
* For policy-specific per-blkcg data:
* Copyright (C) 2015 Paolo Valente <[email protected]>
* Arianna Avanzini <[email protected]>
*/
#include <linux/ioprio.h>
#include <linux/kdev_t.h>
Expand All @@ -26,8 +30,7 @@

static DEFINE_MUTEX(blkcg_pol_mutex);

struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT,
.cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, };
struct blkcg blkcg_root;
EXPORT_SYMBOL_GPL(blkcg_root);

static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
Expand Down Expand Up @@ -823,24 +826,58 @@ static struct cgroup_subsys_state *
blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct blkcg *blkcg;
struct cgroup_subsys_state *ret;
int i;

if (!parent_css) {
blkcg = &blkcg_root;
goto done;
}

blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
if (!blkcg)
return ERR_PTR(-ENOMEM);
if (!blkcg) {
ret = ERR_PTR(-ENOMEM);
goto free_blkcg;
}

for (i = 0; i < BLKCG_MAX_POLS ; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
struct blkcg_policy_data *cpd;

/*
* If the policy hasn't been attached yet, wait for it
* to be attached before doing anything else. Otherwise,
* check if the policy requires any specific per-cgroup
* data: if it does, allocate and initialize it.
*/
if (!pol || !pol->cpd_size)
continue;

BUG_ON(blkcg->pd[i]);
cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
if (!cpd) {
ret = ERR_PTR(-ENOMEM);
goto free_pd_blkcg;
}
blkcg->pd[i] = cpd;
cpd->plid = i;
pol->cpd_init_fn(blkcg);
}

blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
done:
spin_lock_init(&blkcg->lock);
INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
INIT_HLIST_HEAD(&blkcg->blkg_list);

return &blkcg->css;

free_pd_blkcg:
for (i--; i >= 0; i--)
kfree(blkcg->pd[i]);

free_blkcg:
kfree(blkcg);
return ret;
}

/**
Expand Down Expand Up @@ -958,8 +995,10 @@ int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol)
{
LIST_HEAD(pds);
LIST_HEAD(cpds);
struct blkcg_gq *blkg, *new_blkg;
struct blkg_policy_data *pd, *n;
struct blkg_policy_data *pd, *nd;
struct blkcg_policy_data *cpd, *cnd;
int cnt = 0, ret;
bool preloaded;

Expand Down Expand Up @@ -1003,34 +1042,61 @@ int blkcg_activate_policy(struct request_queue *q,

spin_unlock_irq(q->queue_lock);

/* allocate policy_data for all existing blkgs */
/*
* Allocate per-blkg and per-blkcg policy data
* for all existing blkgs.
*/
while (cnt--) {
pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
if (!pd) {
ret = -ENOMEM;
goto out_free;
}
list_add_tail(&pd->alloc_node, &pds);

if (!pol->cpd_size)
continue;
cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node);
if (!cpd) {
ret = -ENOMEM;
goto out_free;
}
list_add_tail(&cpd->alloc_node, &cpds);
}

/*
* Install the allocated pds. With @q bypassing, no new blkg
* Install the allocated pds and cpds. With @q bypassing, no new blkg
* should have been created while the queue lock was dropped.
*/
spin_lock_irq(q->queue_lock);

list_for_each_entry(blkg, &q->blkg_list, q_node) {
if (WARN_ON(list_empty(&pds))) {
if (WARN_ON(list_empty(&pds)) ||
WARN_ON(pol->cpd_size && list_empty(&cpds))) {
/* umm... this shouldn't happen, just abort */
ret = -ENOMEM;
goto out_unlock;
}
cpd = list_first_entry(&cpds, struct blkcg_policy_data,
alloc_node);
list_del_init(&cpd->alloc_node);
pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
list_del_init(&pd->alloc_node);

/* grab blkcg lock too while installing @pd on @blkg */
spin_lock(&blkg->blkcg->lock);

if (!pol->cpd_size)
goto no_cpd;
if (!blkg->blkcg->pd[pol->plid]) {
/* Per-policy per-blkcg data */
blkg->blkcg->pd[pol->plid] = cpd;
cpd->plid = pol->plid;
pol->cpd_init_fn(blkg->blkcg);
} else { /* must free it as it has already been extracted */
kfree(cpd);
}
no_cpd:
blkg->pd[pol->plid] = pd;
pd->blkg = blkg;
pd->plid = pol->plid;
Expand All @@ -1045,8 +1111,10 @@ int blkcg_activate_policy(struct request_queue *q,
spin_unlock_irq(q->queue_lock);
out_free:
blk_queue_bypass_end(q);
list_for_each_entry_safe(pd, n, &pds, alloc_node)
list_for_each_entry_safe(pd, nd, &pds, alloc_node)
kfree(pd);
list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node)
kfree(cpd);
return ret;
}
EXPORT_SYMBOL_GPL(blkcg_activate_policy);
Expand Down Expand Up @@ -1087,6 +1155,8 @@ void blkcg_deactivate_policy(struct request_queue *q,

kfree(blkg->pd[pol->plid]);
blkg->pd[pol->plid] = NULL;
kfree(blkg->blkcg->pd[pol->plid]);
blkg->blkcg->pd[pol->plid] = NULL;

spin_unlock(&blkg->blkcg->lock);
}
Expand Down
Loading

0 comments on commit bfffa1c

Please sign in to comment.