Skip to content

Commit

Permalink
aoe: become I/O request queue handler for increased user control
Browse files Browse the repository at this point in the history
To allow users to choose an elevator algorithm for their particular
workloads, change from a make_request-style driver to an
I/O-request-queue-handler-style driver.

We have to do a couple of things that might be surprising.  We manipulate
the page _count directly on the assumption that we still have no guarantee
that users of the block layer are prohibited from submitting bios
containing pages with zero reference counts.[1] If such a prohibition now
exists, I can get rid of the _count manipulation.

Just as before this patch, we still keep track of the sk_buffs that the
network layer still hasn't finished yet and cap the resources we use with
a "pool" of skbs.[2]

Now that the block layer maintains the disk stats, the aoe driver's
diskstats function can go away.

1. https://lkml.org/lkml/2007/3/1/374
2. https://lkml.org/lkml/2007/7/6/241

Signed-off-by: Ed Cashin <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
ecashin authored and torvalds committed Oct 5, 2012
1 parent 896831f commit 69cf2d8
Show file tree
Hide file tree
Showing 5 changed files with 308 additions and 182 deletions.
26 changes: 14 additions & 12 deletions drivers/block/aoe/aoe.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ enum {
MIN_BUFS = 16,
NTARGETS = 8,
NAOEIFS = 8,
NSKBPOOLMAX = 128,
NSKBPOOLMAX = 256,
NFACTIVE = 17,

TIMERTICK = HZ / 10,
Expand All @@ -100,30 +100,26 @@ enum {
};

struct buf {
struct list_head bufs;
ulong stime; /* for disk stats */
ulong flags;
ulong nframesout;
ulong resid;
ulong bv_resid;
ulong bv_off;
sector_t sector;
struct bio *bio;
struct bio_vec *bv;
struct request *rq;
};

struct frame {
struct list_head head;
u32 tag;
ulong waited;
struct buf *buf;
struct aoetgt *t; /* parent target I belong to */
char *bufaddr;
ulong bcnt;
sector_t lba;
struct sk_buff *skb; /* command skb freed on module exit */
struct sk_buff *r_skb; /* response skb for async processing */
struct buf *buf;
struct bio_vec *bv;
ulong bcnt;
ulong bv_off;
};

Expand Down Expand Up @@ -161,18 +157,21 @@ struct aoedev {
u16 rttavg; /* round trip average of requests/responses */
u16 mintimer;
u16 fw_ver; /* version of blade's firmware */
ulong ref;
struct work_struct work;/* disk create work struct */
struct gendisk *gd;
struct request_queue *blkq;
struct hd_geometry geo;
sector_t ssize;
struct timer_list timer;
spinlock_t lock;
struct sk_buff_head sendq;
struct sk_buff_head skbpool;
mempool_t *bufpool; /* for deadlock-free Buf allocation */
struct list_head bufq; /* queue of bios to work on */
struct buf *inprocess; /* the one we're currently working on */
struct { /* pointers to work in progress */
struct buf *buf;
struct bio *nxbio;
struct request *rq;
} ip;
struct aoetgt *targets[NTARGETS];
struct aoetgt **tgt; /* target in use when working */
struct aoetgt *htgt; /* target needing rexmit assistance */
Expand Down Expand Up @@ -209,14 +208,17 @@ void aoecmd_exit(void);
int aoecmd_init(void);
struct sk_buff *aoecmd_ata_id(struct aoedev *);
void aoe_freetframe(struct frame *);
void aoe_flush_iocq(void);
void aoe_end_request(struct aoedev *, struct request *, int);

int aoedev_init(void);
void aoedev_exit(void);
struct aoedev *aoedev_by_aoeaddr(int maj, int min);
struct aoedev *aoedev_by_sysminor_m(ulong sysminor);
void aoedev_downdev(struct aoedev *d);
int aoedev_flush(const char __user *str, size_t size);
void aoe_failbuf(struct aoedev *d, struct buf *buf);
void aoe_failbuf(struct aoedev *, struct buf *);
void aoedev_put(struct aoedev *);

int aoenet_init(void);
void aoenet_exit(void);
Expand Down
88 changes: 26 additions & 62 deletions drivers/block/aoe/aoeblk.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,68 +161,22 @@ aoeblk_release(struct gendisk *disk, fmode_t mode)
}

static void
aoeblk_make_request(struct request_queue *q, struct bio *bio)
aoeblk_request(struct request_queue *q)
{
struct sk_buff_head queue;
struct aoedev *d;
struct buf *buf;
ulong flags;

blk_queue_bounce(q, &bio);

if (bio == NULL) {
printk(KERN_ERR "aoe: bio is NULL\n");
BUG();
return;
}
d = bio->bi_bdev->bd_disk->private_data;
if (d == NULL) {
printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n");
BUG();
bio_endio(bio, -ENXIO);
return;
} else if (bio->bi_io_vec == NULL) {
printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
BUG();
bio_endio(bio, -ENXIO);
return;
}
buf = mempool_alloc(d->bufpool, GFP_NOIO);
if (buf == NULL) {
printk(KERN_INFO "aoe: buf allocation failure\n");
bio_endio(bio, -ENOMEM);
return;
}
memset(buf, 0, sizeof(*buf));
INIT_LIST_HEAD(&buf->bufs);
buf->stime = jiffies;
buf->bio = bio;
buf->resid = bio->bi_size;
buf->sector = bio->bi_sector;
buf->bv = &bio->bi_io_vec[bio->bi_idx];
buf->bv_resid = buf->bv->bv_len;
WARN_ON(buf->bv_resid == 0);
buf->bv_off = buf->bv->bv_offset;

spin_lock_irqsave(&d->lock, flags);
struct request *rq;

d = q->queuedata;
if ((d->flags & DEVFL_UP) == 0) {
pr_info_ratelimited("aoe: device %ld.%d is not up\n",
d->aoemajor, d->aoeminor);
spin_unlock_irqrestore(&d->lock, flags);
mempool_free(buf, d->bufpool);
bio_endio(bio, -ENXIO);
while ((rq = blk_peek_request(q))) {
blk_start_request(rq);
aoe_end_request(d, rq, 1);
}
return;
}

list_add_tail(&buf->bufs, &d->bufq);

aoecmd_work(d);
__skb_queue_head_init(&queue);
skb_queue_splice_init(&d->sendq, &queue);

spin_unlock_irqrestore(&d->lock, flags);
aoenet_xmit(&queue);
}

static int
Expand Down Expand Up @@ -254,34 +208,46 @@ aoeblk_gdalloc(void *vp)
{
struct aoedev *d = vp;
struct gendisk *gd;
enum { KB = 1024, MB = KB * KB, READ_AHEAD = MB, };
mempool_t *mp;
struct request_queue *q;
enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
ulong flags;

gd = alloc_disk(AOE_PARTITIONS);
if (gd == NULL) {
printk(KERN_ERR
"aoe: cannot allocate disk structure for %ld.%d\n",
pr_err("aoe: cannot allocate disk structure for %ld.%d\n",
d->aoemajor, d->aoeminor);
goto err;
}

d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache);
if (d->bufpool == NULL) {
mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab,
buf_pool_cache);
if (mp == NULL) {
printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
d->aoemajor, d->aoeminor);
goto err_disk;
}
q = blk_init_queue(aoeblk_request, &d->lock);
if (q == NULL) {
pr_err("aoe: cannot allocate block queue for %ld.%d\n",
d->aoemajor, d->aoeminor);
mempool_destroy(mp);
goto err_disk;
}

d->blkq = blk_alloc_queue(GFP_KERNEL);
if (!d->blkq)
goto err_mempool;
blk_queue_make_request(d->blkq, aoeblk_make_request);
d->blkq->backing_dev_info.name = "aoe";
if (bdi_init(&d->blkq->backing_dev_info))
goto err_blkq;
spin_lock_irqsave(&d->lock, flags);
blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS);
d->blkq->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
d->bufpool = mp;
d->blkq = gd->queue = q;
q->queuedata = d;
d->gd = gd;
gd->major = AOE_MAJOR;
gd->first_minor = d->sysminor * AOE_PARTITIONS;
gd->fops = &aoe_bdops;
Expand All @@ -290,8 +256,6 @@ aoeblk_gdalloc(void *vp)
snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
d->aoemajor, d->aoeminor);

gd->queue = d->blkq;
d->gd = gd;
d->flags &= ~DEVFL_GDALLOC;
d->flags |= DEVFL_UP;

Expand Down
1 change: 1 addition & 0 deletions drivers/block/aoe/aoechr.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ revalidate(const char __user *str, size_t size)
spin_lock_irqsave(&d->lock, flags);
goto loop;
}
aoedev_put(d);
if (skb) {
struct sk_buff_head queue;
__skb_queue_head_init(&queue);
Expand Down
Loading

0 comments on commit 69cf2d8

Please sign in to comment.