Skip to content

Commit

Permalink
rcu: Add callback-free CPUs
Browse files Browse the repository at this point in the history
RCU callback execution can add significant OS jitter and also can
degrade both scheduling latency and, in asymmetric multiprocessors,
energy efficiency.  This commit therefore adds the ability for selected
CPUs ("rcu_nocbs=" boot parameter) to have their callbacks offloaded
to kthreads.  If the "rcu_nocb_poll" boot parameter is also specified,
these kthreads will do polling, removing the need for the offloaded
CPUs to do wakeups.  At least one CPU must be doing normal callback
processing: currently CPU 0 cannot be selected as a no-CBs CPU.
In addition, attempts to offline the last normal-CBs CPU will fail.

This feature was inspired by Jim Houston's and Joe Korty's JRCU, and
this commit includes fixes to problems located by Fengguang Wu's
kbuild test robot.

[ paulmck: Added gfp.h include file as suggested by Fengguang Wu. ]

Signed-off-by: Paul E. McKenney <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
  • Loading branch information
Paul E. McKenney authored and paulmck committed Nov 16, 2012
1 parent aac1cda commit 3fbfbf7
Show file tree
Hide file tree
Showing 7 changed files with 542 additions and 16 deletions.
21 changes: 21 additions & 0 deletions Documentation/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2394,6 +2394,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
ramdisk_size= [RAM] Sizes of RAM disks in kilobytes
See Documentation/blockdev/ramdisk.txt.

rcu_nocbs= [KNL,BOOT]
In kernels built with CONFIG_RCU_NOCB_CPU=y, set
the specified list of CPUs to be no-callback CPUs.
Invocation of these CPUs' RCU callbacks will
be offloaded to "rcuoN" kthreads created for
that purpose. This reduces OS jitter on the
offloaded CPUs, which can be useful for HPC and
real-time workloads. It can also improve energy
efficiency for asymmetric multiprocessors.

rcu_nocbs_poll [KNL,BOOT]
Rather than requiring that offloaded CPUs
(specified by rcu_nocbs= above) explicitly
awaken the corresponding "rcuoN" kthreads,
make these kthreads poll for callbacks.
This improves the real-time response for the
offloaded CPUs by relieving them of the need to
wake up the corresponding kthread, but degrades
energy efficiency by requiring that the kthreads
periodically wake up to do the polling.

rcutree.blimit= [KNL,BOOT]
Set maximum number of finished RCU callbacks to process
in one batch.
Expand Down
1 change: 1 addition & 0 deletions include/trace/events/rcu.h
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,7 @@ TRACE_EVENT(rcu_torture_read,
* "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit.
* "Inc1": rcu_barrier_callback() piggyback check counter incremented.
* "Offline": rcu_barrier_callback() found offline CPU
* "OnlineNoCB": rcu_barrier_callback() found online no-CBs CPU.
* "OnlineQ": rcu_barrier_callback() found online CPU with callbacks.
* "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks.
* "IRQ": An rcu_barrier_callback() callback posted on remote CPU.
Expand Down
22 changes: 22 additions & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,28 @@ config RCU_BOOST_DELAY

Accept the default if unsure.

config RCU_NOCB_CPU
bool "Offload RCU callback processing from boot-selected CPUs"
depends on TREE_RCU || TREE_PREEMPT_RCU
default n
help
Use this option to reduce OS jitter for aggressive HPC or
real-time workloads. It can also be used to offload RCU
callback invocation to energy-efficient CPUs in battery-powered
asymmetric multiprocessors.

This option offloads callback invocation from the set of
CPUs specified at boot time by the rcu_nocbs parameter.
For each such CPU, a kthread ("rcuoN") will be created to
invoke callbacks, where the "N" is the CPU being offloaded.
Nothing prevents this kthread from running on the specified
CPUs, but (1) the kthreads may be preempted between each
callback, and (2) affinity or cgroups can be used to force
the kthreads to run on whatever set of CPUs is desired.

Say Y here if you want reduced OS jitter on selected CPUs.
Say N here if you are unsure.

endmenu # "RCU Subsystem"

config IKCONFIG
Expand Down
63 changes: 51 additions & 12 deletions kernel/rcutree.c
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,8 @@ EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
static int
cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
{
return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL];
return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
rdp->nxttail[RCU_DONE_TAIL] != NULL;
}

/*
Expand All @@ -312,8 +313,11 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
static int
cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
{
return *rdp->nxttail[RCU_DONE_TAIL +
(ACCESS_ONCE(rsp->completed) != rdp->completed)] &&
struct rcu_head **ntp;

ntp = rdp->nxttail[RCU_DONE_TAIL +
(ACCESS_ONCE(rsp->completed) != rdp->completed)];
return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp &&
!rcu_gp_in_progress(rsp);
}

Expand Down Expand Up @@ -1123,6 +1127,7 @@ static void init_callback_list(struct rcu_data *rdp)
rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
init_nocb_callback_list(rdp);
}

/*
Expand Down Expand Up @@ -1633,6 +1638,10 @@ static void
rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
struct rcu_node *rnp, struct rcu_data *rdp)
{
/* No-CBs CPUs do not have orphanable callbacks. */
if (is_nocb_cpu(rdp->cpu))
return;

/*
* Orphan the callbacks. First adjust the counts. This is safe
* because _rcu_barrier() excludes CPU-hotplug operations, so it
Expand Down Expand Up @@ -1684,6 +1693,10 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
int i;
struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);

/* No-CBs CPUs are handled specially. */
if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
return;

/* Do the accounting first. */
rdp->qlen_lazy += rsp->qlen_lazy;
rdp->qlen += rsp->qlen;
Expand Down Expand Up @@ -2162,9 +2175,15 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
}
}

/*
* Helper function for call_rcu() and friends. The cpu argument will
* normally be -1, indicating "currently running CPU". It may specify
* a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier()
* is expected to specify a CPU.
*/
static void
__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
struct rcu_state *rsp, bool lazy)
struct rcu_state *rsp, int cpu, bool lazy)
{
unsigned long flags;
struct rcu_data *rdp;
Expand All @@ -2184,9 +2203,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
rdp = this_cpu_ptr(rsp->rda);

/* Add the callback to our list. */
if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) {
if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
int offline;

if (cpu != -1)
rdp = per_cpu_ptr(rsp->rda, cpu);
offline = !__call_rcu_nocb(rdp, head, lazy);
WARN_ON_ONCE(offline);
/* _call_rcu() is illegal on offline CPU; leak the callback. */
WARN_ON_ONCE(1);
local_irq_restore(flags);
return;
}
Expand Down Expand Up @@ -2215,7 +2239,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
*/
void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_sched_state, 0);
__call_rcu(head, func, &rcu_sched_state, -1, 0);
}
EXPORT_SYMBOL_GPL(call_rcu_sched);

Expand All @@ -2224,7 +2248,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
*/
void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_bh_state, 0);
__call_rcu(head, func, &rcu_bh_state, -1, 0);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);

Expand Down Expand Up @@ -2676,9 +2700,17 @@ static void _rcu_barrier(struct rcu_state *rsp)
* When that callback is invoked, we will know that all of the
* corresponding CPU's preceding callbacks have been invoked.
*/
for_each_online_cpu(cpu) {
for_each_possible_cpu(cpu) {
if (!cpu_online(cpu) && !is_nocb_cpu(cpu))
continue;
rdp = per_cpu_ptr(rsp->rda, cpu);
if (ACCESS_ONCE(rdp->qlen)) {
if (is_nocb_cpu(cpu)) {
_rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
rsp->n_barrier_done);
atomic_inc(&rsp->barrier_cpu_count);
__call_rcu(&rdp->barrier_head, rcu_barrier_callback,
rsp, cpu, 0);
} else if (ACCESS_ONCE(rdp->qlen)) {
_rcu_barrier_trace(rsp, "OnlineQ", cpu,
rsp->n_barrier_done);
smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
Expand Down Expand Up @@ -2752,6 +2784,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
#endif
rdp->cpu = cpu;
rdp->rsp = rsp;
rcu_boot_init_nocb_percpu_data(rdp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}

Expand Down Expand Up @@ -2833,6 +2866,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode;
struct rcu_state *rsp;
int ret = NOTIFY_OK;

trace_rcu_utilization("Start CPU hotplug");
switch (action) {
Expand All @@ -2846,7 +2880,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
rcu_boost_kthread_setaffinity(rnp, -1);
break;
case CPU_DOWN_PREPARE:
rcu_boost_kthread_setaffinity(rnp, cpu);
if (nocb_cpu_expendable(cpu))
rcu_boost_kthread_setaffinity(rnp, cpu);
else
ret = NOTIFY_BAD;
break;
case CPU_DYING:
case CPU_DYING_FROZEN:
Expand All @@ -2870,7 +2907,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
break;
}
trace_rcu_utilization("End CPU hotplug");
return NOTIFY_OK;
return ret;
}

/*
Expand All @@ -2890,6 +2927,7 @@ static int __init rcu_spawn_gp_kthread(void)
raw_spin_lock_irqsave(&rnp->lock, flags);
rsp->gp_kthread = t;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
rcu_spawn_nocb_kthreads(rsp);
}
return 0;
}
Expand Down Expand Up @@ -3085,6 +3123,7 @@ void __init rcu_init(void)
rcu_init_one(&rcu_sched_state, &rcu_sched_data);
rcu_init_one(&rcu_bh_state, &rcu_bh_data);
__rcu_init_preempt();
rcu_init_nocb();
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);

/*
Expand Down
47 changes: 47 additions & 0 deletions kernel/rcutree.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,18 @@ struct rcu_data {
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */

/* 7) Callback offloading. */
#ifdef CONFIG_RCU_NOCB_CPU
struct rcu_head *nocb_head; /* CBs waiting for kthread. */
struct rcu_head **nocb_tail;
atomic_long_t nocb_q_count; /* # CBs waiting for kthread */
atomic_long_t nocb_q_count_lazy; /* (approximate). */
int nocb_p_count; /* # CBs being invoked by kthread */
int nocb_p_count_lazy; /* (approximate). */
wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
struct task_struct *nocb_kthread;
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */

int cpu;
struct rcu_state *rsp;
};
Expand Down Expand Up @@ -369,6 +381,12 @@ struct rcu_state {
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
void (*func)(struct rcu_head *head));
#ifdef CONFIG_RCU_NOCB_CPU
void (*call_remote)(struct rcu_head *head,
void (*func)(struct rcu_head *head));
/* call_rcu() flavor, but for */
/* placing on remote CPU. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */

/* The following fields are guarded by the root rcu_node's lock. */

Expand Down Expand Up @@ -439,6 +457,8 @@ struct rcu_state {
#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */

extern struct list_head rcu_struct_flavors;

/* Sequence through rcu_state structures for each RCU flavor. */
#define for_each_rcu_flavor(rsp) \
list_for_each_entry((rsp), &rcu_struct_flavors, flavors)

Expand Down Expand Up @@ -515,5 +535,32 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
static void print_cpu_stall_info_end(void);
static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static void increment_cpu_stall_ticks(void);
static bool is_nocb_cpu(int cpu);
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
bool lazy);
static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
struct rcu_data *rdp);
static bool nocb_cpu_expendable(int cpu);
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
static void init_nocb_callback_list(struct rcu_data *rdp);
static void __init rcu_init_nocb(void);

#endif /* #ifndef RCU_TREE_NONCORE */

#ifdef CONFIG_RCU_TRACE
#ifdef CONFIG_RCU_NOCB_CPU
/* Sum up queue lengths for tracing. */
static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
{
*ql = atomic_long_read(&rdp->nocb_q_count) + rdp->nocb_p_count;
*qll = atomic_long_read(&rdp->nocb_q_count_lazy) + rdp->nocb_p_count_lazy;
}
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
{
*ql = 0;
*qll = 0;
}
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
#endif /* #ifdef CONFIG_RCU_TRACE */
Loading

0 comments on commit 3fbfbf7

Please sign in to comment.