Skip to content

Commit

Permalink
rcu: Avoid waking up CPUs having only kfree_rcu() callbacks
Browse files Browse the repository at this point in the history
When CONFIG_RCU_FAST_NO_HZ is enabled, RCU will allow a given CPU to
enter dyntick-idle mode even if it still has RCU callbacks queued.
RCU avoids system hangs in this case by scheduling a timer for several
jiffies in the future.  However, if all of the callbacks on that CPU
are from kfree_rcu(), there is no reason to wake the CPU up, as it is
not a problem to defer freeing of memory.

This commit therefore tracks the number of callbacks on a given CPU
that are from kfree_rcu(), and avoids scheduling the timer if all of
a given CPU's callbacks are from kfree_rcu().

Signed-off-by: Paul E. McKenney <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
  • Loading branch information
Paul E. McKenney authored and paulmck committed Feb 21, 2012
1 parent 0bb7b59 commit 486e259
Show file tree
Hide file tree
Showing 10 changed files with 153 additions and 47 deletions.
2 changes: 1 addition & 1 deletion include/linux/rcupdate.h
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
/* See the kfree_rcu() header comment. */
BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));

call_rcu(head, (rcu_callback)offset);
kfree_call_rcu(head, (rcu_callback)offset);
}

/**
Expand Down
6 changes: 6 additions & 0 deletions include/linux/rcutiny.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ static inline void synchronize_sched_expedited(void)
synchronize_sched();
}

static inline void kfree_call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu))
{
call_rcu(head, func);
}

#ifdef CONFIG_TINY_RCU

static inline void rcu_preempt_note_context_switch(void)
Expand Down
2 changes: 2 additions & 0 deletions include/linux/rcutree.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ extern void synchronize_rcu_bh(void);
extern void synchronize_sched_expedited(void);
extern void synchronize_rcu_expedited(void);

void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));

static inline void synchronize_rcu_bh_expedited(void)
{
synchronize_sched_expedited();
Expand Down
63 changes: 39 additions & 24 deletions include/trace/events/rcu.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,94 +313,104 @@ TRACE_EVENT(rcu_prep_idle,
/*
* Tracepoint for the registration of a single RCU callback function.
* The first argument is the type of RCU, the second argument is
* a pointer to the RCU callback itself, and the third element is the
* new RCU callback queue length for the current CPU.
* a pointer to the RCU callback itself, the third element is the
* number of lazy callbacks queued, and the fourth element is the
* total number of callbacks queued.
*/
TRACE_EVENT(rcu_callback,

TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen),
TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen_lazy,
long qlen),

TP_ARGS(rcuname, rhp, qlen),
TP_ARGS(rcuname, rhp, qlen_lazy, qlen),

TP_STRUCT__entry(
__field(char *, rcuname)
__field(void *, rhp)
__field(void *, func)
__field(long, qlen_lazy)
__field(long, qlen)
),

TP_fast_assign(
__entry->rcuname = rcuname;
__entry->rhp = rhp;
__entry->func = rhp->func;
__entry->qlen_lazy = qlen_lazy;
__entry->qlen = qlen;
),

TP_printk("%s rhp=%p func=%pf %ld",
__entry->rcuname, __entry->rhp, __entry->func, __entry->qlen)
TP_printk("%s rhp=%p func=%pf %ld/%ld",
__entry->rcuname, __entry->rhp, __entry->func,
__entry->qlen_lazy, __entry->qlen)
);

/*
* Tracepoint for the registration of a single RCU callback of the special
* kfree() form. The first argument is the RCU type, the second argument
* is a pointer to the RCU callback, the third argument is the offset
* of the callback within the enclosing RCU-protected data structure,
* and the fourth argument is the new RCU callback queue length for the
* current CPU.
* the fourth argument is the number of lazy callbacks queued, and the
* fifth argument is the total number of callbacks queued.
*/
TRACE_EVENT(rcu_kfree_callback,

TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset,
long qlen),
long qlen_lazy, long qlen),

TP_ARGS(rcuname, rhp, offset, qlen),
TP_ARGS(rcuname, rhp, offset, qlen_lazy, qlen),

TP_STRUCT__entry(
__field(char *, rcuname)
__field(void *, rhp)
__field(unsigned long, offset)
__field(long, qlen_lazy)
__field(long, qlen)
),

TP_fast_assign(
__entry->rcuname = rcuname;
__entry->rhp = rhp;
__entry->offset = offset;
__entry->qlen_lazy = qlen_lazy;
__entry->qlen = qlen;
),

TP_printk("%s rhp=%p func=%ld %ld",
TP_printk("%s rhp=%p func=%ld %ld/%ld",
__entry->rcuname, __entry->rhp, __entry->offset,
__entry->qlen)
__entry->qlen_lazy, __entry->qlen)
);

/*
* Tracepoint for marking the beginning rcu_do_batch, performed to start
* RCU callback invocation. The first argument is the RCU flavor,
* the second is the total number of callbacks (including those that
* are not yet ready to be invoked), and the third argument is the
* current RCU-callback batch limit.
* the second is the number of lazy callbacks queued, the third is
* the total number of callbacks queued, and the fourth argument is
* the current RCU-callback batch limit.
*/
TRACE_EVENT(rcu_batch_start,

TP_PROTO(char *rcuname, long qlen, int blimit),
TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit),

TP_ARGS(rcuname, qlen, blimit),
TP_ARGS(rcuname, qlen_lazy, qlen, blimit),

TP_STRUCT__entry(
__field(char *, rcuname)
__field(long, qlen_lazy)
__field(long, qlen)
__field(int, blimit)
),

TP_fast_assign(
__entry->rcuname = rcuname;
__entry->qlen_lazy = qlen_lazy;
__entry->qlen = qlen;
__entry->blimit = blimit;
),

TP_printk("%s CBs=%ld bl=%d",
__entry->rcuname, __entry->qlen, __entry->blimit)
TP_printk("%s CBs=%ld/%ld bl=%d",
__entry->rcuname, __entry->qlen_lazy, __entry->qlen,
__entry->blimit)
);

/*
Expand Down Expand Up @@ -531,16 +541,21 @@ TRACE_EVENT(rcu_torture_read,
#else /* #ifdef CONFIG_RCU_TRACE */

#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0)
#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
qsmask) do { } while (0)
#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
grplo, grphi, gp_tasks) do { } \
while (0)
#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
#define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0)
#define trace_rcu_prep_idle(reason) do { } while (0)
#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
#define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0)
#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \
do { } while (0)
#define trace_rcu_batch_start(rcuname, qlen_lazy, qlen, blimit) \
do { } while (0)
#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0)
#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
Expand Down
4 changes: 3 additions & 1 deletion kernel/rcu.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,18 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)

extern void kfree(const void *);

static inline void __rcu_reclaim(char *rn, struct rcu_head *head)
static inline bool __rcu_reclaim(char *rn, struct rcu_head *head)
{
unsigned long offset = (unsigned long)head->func;

if (__is_kfree_rcu_offset(offset)) {
RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
kfree((void *)head - offset);
return 1;
} else {
RCU_TRACE(trace_rcu_invoke_callback(rn, head));
head->func(head);
return 0;
}
}

Expand Down
4 changes: 2 additions & 2 deletions kernel/rcutiny.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)

/* If no RCU callbacks ready to invoke, just return. */
if (&rcp->rcucblist == rcp->donetail) {
RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1));
RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
ACCESS_ONCE(rcp->rcucblist),
need_resched(),
Expand All @@ -269,7 +269,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)

/* Move the ready-to-invoke callbacks to a local list. */
local_irq_save(flags);
RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
list = rcp->rcucblist;
rcp->rcucblist = *rcp->donetail;
*rcp->donetail = NULL;
Expand Down
29 changes: 18 additions & 11 deletions kernel/rcutree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1261,13 +1261,15 @@ static void rcu_send_cbs_to_online(struct rcu_state *rsp)

*receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
receive_rdp->qlen_lazy += rdp->qlen_lazy;
receive_rdp->qlen += rdp->qlen;
receive_rdp->n_cbs_adopted += rdp->qlen;
rdp->n_cbs_orphaned += rdp->qlen;

rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
rdp->qlen_lazy = 0;
rdp->qlen = 0;
}

Expand Down Expand Up @@ -1368,11 +1370,11 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
{
unsigned long flags;
struct rcu_head *next, *list, **tail;
int bl, count;
int bl, count, count_lazy;

/* If no callbacks are ready, just return.*/
if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
trace_rcu_batch_start(rsp->name, 0, 0);
trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
need_resched(), is_idle_task(current),
rcu_is_callbacks_kthread());
Expand All @@ -1385,7 +1387,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
*/
local_irq_save(flags);
bl = rdp->blimit;
trace_rcu_batch_start(rsp->name, rdp->qlen, bl);
trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
list = rdp->nxtlist;
rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
*rdp->nxttail[RCU_DONE_TAIL] = NULL;
Expand All @@ -1396,12 +1398,13 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
local_irq_restore(flags);

/* Invoke callbacks. */
count = 0;
count = count_lazy = 0;
while (list) {
next = list->next;
prefetch(next);
debug_rcu_head_unqueue(list);
__rcu_reclaim(rsp->name, list);
if (__rcu_reclaim(rsp->name, list))
count_lazy++;
list = next;
/* Stop only if limit reached and CPU has something to do. */
if (++count >= bl &&
Expand All @@ -1416,6 +1419,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
rcu_is_callbacks_kthread());

/* Update count, and requeue any remaining callbacks. */
rdp->qlen_lazy -= count_lazy;
rdp->qlen -= count;
rdp->n_cbs_invoked += count;
if (list != NULL) {
Expand Down Expand Up @@ -1702,7 +1706,7 @@ static void invoke_rcu_core(void)

static void
__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
struct rcu_state *rsp)
struct rcu_state *rsp, bool lazy)
{
unsigned long flags;
struct rcu_data *rdp;
Expand All @@ -1727,12 +1731,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
*rdp->nxttail[RCU_NEXT_TAIL] = head;
rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
rdp->qlen++;
if (lazy)
rdp->qlen_lazy++;

if (__is_kfree_rcu_offset((unsigned long)func))
trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
rdp->qlen);
rdp->qlen_lazy, rdp->qlen);
else
trace_rcu_callback(rsp->name, head, rdp->qlen);
trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);

/* If interrupts were disabled, don't dive into RCU core. */
if (irqs_disabled_flags(flags)) {
Expand Down Expand Up @@ -1779,16 +1785,16 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
*/
void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_sched_state);
__call_rcu(head, func, &rcu_sched_state, 0);
}
EXPORT_SYMBOL_GPL(call_rcu_sched);

/*
* Queue an RCU for invocation after a quicker grace period.
* Queue an RCU callback for invocation after a quicker grace period.
*/
void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_bh_state);
__call_rcu(head, func, &rcu_bh_state, 0);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);

Expand Down Expand Up @@ -2036,6 +2042,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
rdp->qlen_lazy = 0;
rdp->qlen = 0;
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING);
Expand Down
3 changes: 2 additions & 1 deletion kernel/rcutree.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ struct rcu_data {
*/
struct rcu_head *nxtlist;
struct rcu_head **nxttail[RCU_NEXT_SIZE];
long qlen; /* # of queued callbacks */
long qlen_lazy; /* # of lazy queued callbacks */
long qlen; /* # of queued callbacks, incl lazy */
long qlen_last_fqs_check;
/* qlen at last check for QS forcing */
unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
Expand Down
Loading

0 comments on commit 486e259

Please sign in to comment.