Skip to content

Commit

Permalink
block: Introduce new bio_split()
Browse files Browse the repository at this point in the history
The new bio_split() can split arbitrary bios - it's not restricted to
single page bios, like the old bio_split() (previously renamed to
bio_pair_split()). It also has different semantics - it doesn't allocate
a struct bio_pair, leaving it up to the caller to handle completions.

Then convert the existing bio_pair_split() users to the new bio_split()
- and also nvme, which was open coding bio splitting.

(We have to take that BUG_ON() out of bio_integrity_trim() because this
bio_split() needs to use it, and there's no reason it has to be used on
bios marked as cloned; BIO_CLONED doesn't seem to have clearly
documented semantics anyways.)

Signed-off-by: Kent Overstreet <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Martin K. Petersen <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Keith Busch <[email protected]>
Cc: Vishal Verma <[email protected]>
Cc: Jiri Kosina <[email protected]>
Cc: Neil Brown <[email protected]>
  • Loading branch information
Kent Overstreet committed Nov 24, 2013
1 parent ee67891 commit 20d0189
Show file tree
Hide file tree
Showing 10 changed files with 272 additions and 409 deletions.
106 changes: 9 additions & 97 deletions drivers/block/nvme-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -441,104 +441,19 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
return total_len;
}

struct nvme_bio_pair {
struct bio b1, b2, *parent;
struct bio_vec *bv1, *bv2;
int err;
atomic_t cnt;
};

static void nvme_bio_pair_endio(struct bio *bio, int err)
{
struct nvme_bio_pair *bp = bio->bi_private;

if (err)
bp->err = err;

if (atomic_dec_and_test(&bp->cnt)) {
bio_endio(bp->parent, bp->err);
kfree(bp->bv1);
kfree(bp->bv2);
kfree(bp);
}
}

static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx,
int len, int offset)
{
struct nvme_bio_pair *bp;

BUG_ON(len > bio->bi_iter.bi_size);
BUG_ON(idx > bio->bi_vcnt);

bp = kmalloc(sizeof(*bp), GFP_ATOMIC);
if (!bp)
return NULL;
bp->err = 0;

bp->b1 = *bio;
bp->b2 = *bio;

bp->b1.bi_iter.bi_size = len;
bp->b2.bi_iter.bi_size -= len;
bp->b1.bi_vcnt = idx;
bp->b2.bi_iter.bi_idx = idx;
bp->b2.bi_iter.bi_sector += len >> 9;

if (offset) {
bp->bv1 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
GFP_ATOMIC);
if (!bp->bv1)
goto split_fail_1;

bp->bv2 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
GFP_ATOMIC);
if (!bp->bv2)
goto split_fail_2;

memcpy(bp->bv1, bio->bi_io_vec,
bio->bi_max_vecs * sizeof(struct bio_vec));
memcpy(bp->bv2, bio->bi_io_vec,
bio->bi_max_vecs * sizeof(struct bio_vec));

bp->b1.bi_io_vec = bp->bv1;
bp->b2.bi_io_vec = bp->bv2;
bp->b2.bi_io_vec[idx].bv_offset += offset;
bp->b2.bi_io_vec[idx].bv_len -= offset;
bp->b1.bi_io_vec[idx].bv_len = offset;
bp->b1.bi_vcnt++;
} else
bp->bv1 = bp->bv2 = NULL;

bp->b1.bi_private = bp;
bp->b2.bi_private = bp;

bp->b1.bi_end_io = nvme_bio_pair_endio;
bp->b2.bi_end_io = nvme_bio_pair_endio;

bp->parent = bio;
atomic_set(&bp->cnt, 2);

return bp;

split_fail_2:
kfree(bp->bv1);
split_fail_1:
kfree(bp);
return NULL;
}

static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
int idx, int len, int offset)
int len)
{
struct nvme_bio_pair *bp = nvme_bio_split(bio, idx, len, offset);
if (!bp)
struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL);
if (!split)
return -ENOMEM;

bio_chain(split, bio);

if (bio_list_empty(&nvmeq->sq_cong))
add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
bio_list_add(&nvmeq->sq_cong, &bp->b1);
bio_list_add(&nvmeq->sq_cong, &bp->b2);
bio_list_add(&nvmeq->sq_cong, split);
bio_list_add(&nvmeq->sq_cong, bio);

return 0;
}
Expand Down Expand Up @@ -568,8 +483,7 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
} else {
if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
return nvme_split_and_submit(bio, nvmeq,
iter.bi_idx,
length, 0);
length);

sg = sg ? sg + 1 : iod->sg;
sg_set_page(sg, bvec.bv_page,
Expand All @@ -578,9 +492,7 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
}

if (split_len - length < bvec.bv_len)
return nvme_split_and_submit(bio, nvmeq, iter.bi_idx,
split_len,
split_len - length);
return nvme_split_and_submit(bio, nvmeq, split_len);
length += bvec.bv_len;
bvprv = bvec;
first = 0;
Expand Down
136 changes: 74 additions & 62 deletions drivers/block/pktcdvd.c
Original file line number Diff line number Diff line change
Expand Up @@ -2338,75 +2338,29 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err)
pkt_bio_finished(pd);
}

static void pkt_make_request(struct request_queue *q, struct bio *bio)
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
struct pktcdvd_device *pd;
char b[BDEVNAME_SIZE];
struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);

psd->pd = pd;
psd->bio = bio;
cloned_bio->bi_bdev = pd->bdev;
cloned_bio->bi_private = psd;
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
pd->stats.secs_r += bio_sectors(bio);
pkt_queue_bio(pd, cloned_bio);
}

static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
{
struct pktcdvd_device *pd = q->queuedata;
sector_t zone;
struct packet_data *pkt;
int was_empty, blocked_bio;
struct pkt_rb_node *node;

pd = q->queuedata;
if (!pd) {
pr_err("%s incorrect request queue\n",
bdevname(bio->bi_bdev, b));
goto end_io;
}

/*
* Clone READ bios so we can have our own bi_end_io callback.
*/
if (bio_data_dir(bio) == READ) {
struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);

psd->pd = pd;
psd->bio = bio;
cloned_bio->bi_bdev = pd->bdev;
cloned_bio->bi_private = psd;
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
pd->stats.secs_r += bio_sectors(bio);
pkt_queue_bio(pd, cloned_bio);
return;
}

if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
pkt_notice(pd, "WRITE for ro device (%llu)\n",
(unsigned long long)bio->bi_iter.bi_sector);
goto end_io;
}

if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
pkt_err(pd, "wrong bio size\n");
goto end_io;
}

blk_queue_bounce(q, &bio);

zone = get_zone(bio->bi_iter.bi_sector, pd);
pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
(unsigned long long)bio->bi_iter.bi_sector,
(unsigned long long)bio_end_sector(bio));

/* Check if we have to split the bio */
{
struct bio_pair *bp;
sector_t last_zone;
int first_sectors;

last_zone = get_zone(bio_end_sector(bio) - 1, pd);
if (last_zone != zone) {
BUG_ON(last_zone != zone + pd->settings.size);
first_sectors = last_zone - bio->bi_iter.bi_sector;
bp = bio_pair_split(bio, first_sectors);
BUG_ON(!bp);
pkt_make_request(q, &bp->bio1);
pkt_make_request(q, &bp->bio2);
bio_pair_release(bp);
return;
}
}

/*
* If we find a matching packet in state WAITING or READ_WAIT, we can
Expand Down Expand Up @@ -2480,6 +2434,64 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
*/
wake_up(&pd->wqueue);
}
}

static void pkt_make_request(struct request_queue *q, struct bio *bio)
{
struct pktcdvd_device *pd;
char b[BDEVNAME_SIZE];
struct bio *split;

pd = q->queuedata;
if (!pd) {
pr_err("%s incorrect request queue\n",
bdevname(bio->bi_bdev, b));
goto end_io;
}

pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
(unsigned long long)bio->bi_iter.bi_sector,
(unsigned long long)bio_end_sector(bio));

/*
* Clone READ bios so we can have our own bi_end_io callback.
*/
if (bio_data_dir(bio) == READ) {
pkt_make_request_read(pd, bio);
return;
}

if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
pkt_notice(pd, "WRITE for ro device (%llu)\n",
(unsigned long long)bio->bi_iter.bi_sector);
goto end_io;
}

if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
pkt_err(pd, "wrong bio size\n");
goto end_io;
}

blk_queue_bounce(q, &bio);

do {
sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);

if (last_zone != zone) {
BUG_ON(last_zone != zone + pd->settings.size);

split = bio_split(bio, last_zone -
bio->bi_iter.bi_sector,
GFP_NOIO, fs_bio_set);
bio_chain(split, bio);
} else {
split = bio;
}

pkt_make_request_write(q, split);
} while (split != bio);

return;
end_io:
bio_io_error(bio);
Expand Down
1 change: 0 additions & 1 deletion drivers/md/bcache/bcache.h
Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,6 @@ void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
void bch_bbio_free(struct bio *, struct cache_set *);
struct bio *bch_bbio_alloc(struct cache_set *);

struct bio *bch_bio_split(struct bio *, int, gfp_t, struct bio_set *);
void bch_generic_make_request(struct bio *, struct bio_split_pool *);
void __bch_submit_bbio(struct bio *, struct cache_set *);
void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
Expand Down
82 changes: 2 additions & 80 deletions drivers/md/bcache/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,84 +11,6 @@

#include <linux/blkdev.h>

/**
* bch_bio_split - split a bio
* @bio: bio to split
* @sectors: number of sectors to split from the front of @bio
* @gfp: gfp mask
* @bs: bio set to allocate from
*
* Allocates and returns a new bio which represents @sectors from the start of
* @bio, and updates @bio to represent the remaining sectors.
*
* If bio_sectors(@bio) was less than or equal to @sectors, returns @bio
* unchanged.
*
* The newly allocated bio will point to @bio's bi_io_vec, if the split was on a
* bvec boundry; it is the caller's responsibility to ensure that @bio is not
* freed before the split.
*/
struct bio *bch_bio_split(struct bio *bio, int sectors,
gfp_t gfp, struct bio_set *bs)
{
unsigned vcnt = 0, nbytes = sectors << 9;
struct bio_vec bv;
struct bvec_iter iter;
struct bio *ret = NULL;

BUG_ON(sectors <= 0);

if (sectors >= bio_sectors(bio))
return bio;

if (bio->bi_rw & REQ_DISCARD) {
ret = bio_alloc_bioset(gfp, 1, bs);
if (!ret)
return NULL;
goto out;
}

bio_for_each_segment(bv, bio, iter) {
vcnt++;

if (nbytes <= bv.bv_len)
break;

nbytes -= bv.bv_len;
}

ret = bio_alloc_bioset(gfp, vcnt, bs);
if (!ret)
return NULL;

bio_for_each_segment(bv, bio, iter) {
ret->bi_io_vec[ret->bi_vcnt++] = bv;

if (ret->bi_vcnt == vcnt)
break;
}

ret->bi_io_vec[ret->bi_vcnt - 1].bv_len = nbytes;
out:
ret->bi_bdev = bio->bi_bdev;
ret->bi_iter.bi_sector = bio->bi_iter.bi_sector;
ret->bi_iter.bi_size = sectors << 9;
ret->bi_rw = bio->bi_rw;

if (bio_integrity(bio)) {
if (bio_integrity_clone(ret, bio, gfp)) {
bio_put(ret);
return NULL;
}

bio_integrity_trim(ret, 0, bio_sectors(ret));
}

bio_advance(bio, ret->bi_iter.bi_size);

return ret;
}

static unsigned bch_bio_max_sectors(struct bio *bio)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
Expand Down Expand Up @@ -172,8 +94,8 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
bio_get(bio);

do {
n = bch_bio_split(bio, bch_bio_max_sectors(bio),
GFP_NOIO, s->p->bio_split);
n = bio_next_split(bio, bch_bio_max_sectors(bio),
GFP_NOIO, s->p->bio_split);

n->bi_end_io = bch_bio_submit_split_endio;
n->bi_private = &s->cl;
Expand Down
Loading

0 comments on commit 20d0189

Please sign in to comment.