Skip to content

Commit

Permalink
cfq-iosched: get rid of the need for __GFP_NOFAIL in cfq_find_alloc_q…
Browse files Browse the repository at this point in the history
…ueue()

Setup an emergency fallback cfqq that we allocate at IO scheduler init
time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
never fails without having to ensure free memory.

On cfqq lookup, always try to allocate a new cfqq if the given cfq io
context has the oom_cfqq assigned. This ensures that we only temporarily
punt to this shared queue.

Reviewed-by: Jeff Moyer <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
Jens Axboe committed Jul 1, 2009
1 parent d5036d7 commit 6118b70
Showing 1 changed file with 73 additions and 64 deletions.
137 changes: 73 additions & 64 deletions block/cfq-iosched.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,51 @@ struct cfq_rb_root {
};
#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, }

/*
* Per process-grouping structure
*/
struct cfq_queue {
/* reference count */
atomic_t ref;
/* various state flags, see below */
unsigned int flags;
/* parent cfq_data */
struct cfq_data *cfqd;
/* service_tree member */
struct rb_node rb_node;
/* service_tree key */
unsigned long rb_key;
/* prio tree member */
struct rb_node p_node;
/* prio tree root we belong to, if any */
struct rb_root *p_root;
/* sorted list of pending requests */
struct rb_root sort_list;
/* if fifo isn't expired, next request to serve */
struct request *next_rq;
/* requests queued in sort_list */
int queued[2];
/* currently allocated requests */
int allocated[2];
/* fifo list of requests in sort_list */
struct list_head fifo;

unsigned long slice_end;
long slice_resid;
unsigned int slice_dispatch;

/* pending metadata requests */
int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;

/* io prio of this group */
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;

pid_t pid;
};

/*
* Per block device queue structure
*/
Expand Down Expand Up @@ -135,51 +180,11 @@ struct cfq_data {
unsigned int cfq_slice_idle;

struct list_head cic_list;
};

/*
* Per process-grouping structure
*/
struct cfq_queue {
/* reference count */
atomic_t ref;
/* various state flags, see below */
unsigned int flags;
/* parent cfq_data */
struct cfq_data *cfqd;
/* service_tree member */
struct rb_node rb_node;
/* service_tree key */
unsigned long rb_key;
/* prio tree member */
struct rb_node p_node;
/* prio tree root we belong to, if any */
struct rb_root *p_root;
/* sorted list of pending requests */
struct rb_root sort_list;
/* if fifo isn't expired, next request to serve */
struct request *next_rq;
/* requests queued in sort_list */
int queued[2];
/* currently allocated requests */
int allocated[2];
/* fifo list of requests in sort_list */
struct list_head fifo;

unsigned long slice_end;
long slice_resid;
unsigned int slice_dispatch;

/* pending metadata requests */
int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;

/* io prio of this group */
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;

pid_t pid;
/*
* Fallback dummy cfqq for extreme OOM conditions
*/
struct cfq_queue oom_cfqq;
};

enum cfqq_state_flags {
Expand Down Expand Up @@ -1673,41 +1678,40 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
/* cic always exists here */
cfqq = cic_to_cfqq(cic, is_sync);

if (!cfqq) {
/*
* Always try a new alloc if we fell back to the OOM cfqq
* originally, since it should just be a temporary situation.
*/
if (!cfqq || cfqq == &cfqd->oom_cfqq) {
cfqq = NULL;
if (new_cfqq) {
cfqq = new_cfqq;
new_cfqq = NULL;
} else if (gfp_mask & __GFP_WAIT) {
/*
* Inform the allocator of the fact that we will
* just repeat this allocation if it fails, to allow
* the allocator to do whatever it needs to attempt to
* free memory.
*/
spin_unlock_irq(cfqd->queue->queue_lock);
new_cfqq = kmem_cache_alloc_node(cfq_pool,
gfp_mask | __GFP_NOFAIL | __GFP_ZERO,
gfp_mask | __GFP_ZERO,
cfqd->queue->node);
spin_lock_irq(cfqd->queue->queue_lock);
goto retry;
if (new_cfqq)
goto retry;
} else {
cfqq = kmem_cache_alloc_node(cfq_pool,
gfp_mask | __GFP_ZERO,
cfqd->queue->node);
if (!cfqq)
goto out;
}

cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
cfq_init_prio_data(cfqq, ioc);
cfq_log_cfqq(cfqd, cfqq, "alloced");
if (cfqq) {
cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
cfq_init_prio_data(cfqq, ioc);
cfq_log_cfqq(cfqd, cfqq, "alloced");
} else
cfqq = &cfqd->oom_cfqq;
}

if (new_cfqq)
kmem_cache_free(cfq_pool, new_cfqq);

out:
WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
return cfqq;
}

Expand Down Expand Up @@ -1740,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
cfqq = *async_cfqq;
}

if (!cfqq) {
if (!cfqq)
cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
if (!cfqq)
return NULL;
}

/*
* pin the queue now that it's allocated, scheduler exit will prune it
Expand Down Expand Up @@ -2470,6 +2471,14 @@ static void *cfq_init_queue(struct request_queue *q)
for (i = 0; i < CFQ_PRIO_LISTS; i++)
cfqd->prio_trees[i] = RB_ROOT;

/*
* Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
* Grab a permanent reference to it, so that the normal code flow
* will not attempt to free it.
*/
cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
atomic_inc(&cfqd->oom_cfqq.ref);

INIT_LIST_HEAD(&cfqd->cic_list);

cfqd->queue = q;
Expand Down

0 comments on commit 6118b70

Please sign in to comment.