Skip to content

Commit

Permalink
rcu: Rework rcu_barrier() and callback-migration logic
Browse files Browse the repository at this point in the history
This commit reworks rcu_barrier() and callback-migration logic to
permit allowing rcu_barrier() to run concurrently with CPU-hotplug
operations.  The key trick is for callback migration to check to see if
an rcu_barrier() is in flight, and, if so, enqueue the ->barrier_head
callback on its behalf.

This commit adds synchronization with RCU's CPU-hotplug notifiers.  Taken
together, this will permit a later commit to remove the cpus_read_lock()
and cpus_read_unlock() calls from rcu_barrier().

[ paulmck: Updated per kbuild test robot feedback. ]
[ paulmck: Updated per reviews session with Neeraj, Frederic, Uladzislau, and Boqun. ]

Signed-off-by: Paul E. McKenney <[email protected]>
  • Loading branch information
paulmckrcu committed Feb 8, 2022
1 parent 0cabb47 commit a16578d
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 16 deletions.
77 changes: 61 additions & 16 deletions kernel/rcu/tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -3987,13 +3987,16 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
}

/*
* Called with preemption disabled, and from cross-cpu IRQ context.
* If needed, entrain an rcu_barrier() callback on rdp->cblist.
*/
static void rcu_barrier_func(void *cpu_in)
static void rcu_barrier_entrain(struct rcu_data *rdp)
{
uintptr_t cpu = (uintptr_t)cpu_in;
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);

lockdep_assert_held(&rdp->barrier_lock);
if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
return;
rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
rdp->barrier_head.func = rcu_barrier_callback;
debug_rcu_head_queue(&rdp->barrier_head);
Expand All @@ -4003,10 +4006,26 @@ static void rcu_barrier_func(void *cpu_in)
atomic_inc(&rcu_state.barrier_cpu_count);
} else {
debug_rcu_head_unqueue(&rdp->barrier_head);
rcu_barrier_trace(TPS("IRQNQ"), -1,
rcu_state.barrier_sequence);
rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
}
rcu_nocb_unlock(rdp);
smp_store_release(&rdp->barrier_seq_snap, gseq);
}

/*
* Called with preemption disabled, and from cross-cpu IRQ context.
*/
static void rcu_barrier_handler(void *cpu_in)
{
uintptr_t cpu = (uintptr_t)cpu_in;
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);

lockdep_assert_irqs_disabled();
WARN_ON_ONCE(cpu != rdp->cpu);
WARN_ON_ONCE(cpu != smp_processor_id());
raw_spin_lock(&rdp->barrier_lock);
rcu_barrier_entrain(rdp);
raw_spin_unlock(&rdp->barrier_lock);
}

/**
Expand All @@ -4020,6 +4039,8 @@ static void rcu_barrier_func(void *cpu_in)
void rcu_barrier(void)
{
uintptr_t cpu;
unsigned long flags;
unsigned long gseq;
struct rcu_data *rdp;
unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);

Expand All @@ -4038,6 +4059,7 @@ void rcu_barrier(void)

/* Mark the start of the barrier operation. */
rcu_seq_start(&rcu_state.barrier_sequence);
gseq = rcu_state.barrier_sequence;
rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);

/*
Expand All @@ -4058,19 +4080,30 @@ void rcu_barrier(void)
*/
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(&rcu_data, cpu);
retry:
if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq)
continue;
raw_spin_lock_irqsave(&rdp->barrier_lock, flags);
if (!rcu_segcblist_n_cbs(&rdp->cblist)) {
WRITE_ONCE(rdp->barrier_seq_snap, gseq);
raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence);
continue;
}
if (cpu_online(cpu)) {
rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1);
} else {
if (!rcu_rdp_cpu_online(rdp)) {
rcu_barrier_entrain(rdp);
WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence);
local_irq_disable();
rcu_barrier_func((void *)cpu);
local_irq_enable();
continue;
}
raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
if (smp_call_function_single(cpu, rcu_barrier_handler, (void *)cpu, 1)) {
schedule_timeout_uninterruptible(1);
goto retry;
}
WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
}
cpus_read_unlock();

Expand All @@ -4087,6 +4120,12 @@ void rcu_barrier(void)
/* Mark the end of the barrier operation. */
rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
rcu_seq_end(&rcu_state.barrier_sequence);
gseq = rcu_state.barrier_sequence;
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(&rcu_data, cpu);

WRITE_ONCE(rdp->barrier_seq_snap, gseq);
}

/* Other rcu_barrier() invocations can now safely proceed. */
mutex_unlock(&rcu_state.barrier_mutex);
Expand Down Expand Up @@ -4134,6 +4173,8 @@ rcu_boot_init_percpu_data(int cpu)
INIT_WORK(&rdp->strict_work, strict_work_handler);
WARN_ON_ONCE(rdp->dynticks_nesting != 1);
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
raw_spin_lock_init(&rdp->barrier_lock);
rdp->barrier_seq_snap = rcu_state.barrier_sequence;
rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
Expand Down Expand Up @@ -4284,8 +4325,10 @@ void rcu_cpu_starting(unsigned int cpu)
local_irq_save(flags);
arch_spin_lock(&rcu_state.ofl_lock);
rcu_dynticks_eqs_online();
raw_spin_lock(&rdp->barrier_lock);
raw_spin_lock_rcu_node(rnp);
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
raw_spin_unlock(&rdp->barrier_lock);
newcpu = !(rnp->expmaskinitnext & mask);
rnp->expmaskinitnext |= mask;
/* Allow lockless access for expedited grace periods. */
Expand Down Expand Up @@ -4372,7 +4415,9 @@ void rcutree_migrate_callbacks(int cpu)
rcu_segcblist_empty(&rdp->cblist))
return; /* No callbacks to migrate. */

local_irq_save(flags);
raw_spin_lock_irqsave(&rdp->barrier_lock, flags);
WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
rcu_barrier_entrain(rdp);
my_rdp = this_cpu_ptr(&rcu_data);
my_rnp = my_rdp->mynode;
rcu_nocb_lock(my_rdp); /* irqs already disabled. */
Expand All @@ -4382,10 +4427,10 @@ void rcutree_migrate_callbacks(int cpu)
needwake = rcu_advance_cbs(my_rnp, rdp) ||
rcu_advance_cbs(my_rnp, my_rdp);
rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
raw_spin_unlock(&rdp->barrier_lock); /* irqs remain disabled. */
needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp);
rcu_segcblist_disable(&rdp->cblist);
WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
!rcu_segcblist_n_cbs(&my_rdp->cblist));
WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist));
if (rcu_rdp_is_offloaded(my_rdp)) {
raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */
__call_rcu_nocb_wake(my_rdp, true, flags);
Expand Down
2 changes: 2 additions & 0 deletions kernel/rcu/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ struct rcu_data {
bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */

/* 4) rcu_barrier(), OOM callbacks, and expediting. */
raw_spinlock_t barrier_lock; /* Protects ->barrier_seq_snap. */
unsigned long barrier_seq_snap; /* Snap of rcu_state.barrier_sequence. */
struct rcu_head barrier_head;
int exp_dynticks_snap; /* Double-check need for IPI. */

Expand Down

0 comments on commit a16578d

Please sign in to comment.