Skip to content

Commit

Permalink
Merge tag 'rcu.2022.03.13a' of git://git.kernel.org/pub/scm/linux/ker…
Browse files Browse the repository at this point in the history
…nel/git/paulmck/linux-rcu

Pull RCU updates from Paul McKenney:

 - Fix idle detection (Neeraj Upadhyay) and missing access marking
   detected by KCSAN.

 - Reduce coupling between rcu_barrier() and CPU-hotplug operations, so
   that rcu_barrier() no longer needs to do cpus_read_lock(). This may
   also someday allow system boot to bring CPUs online concurrently.

 - Enable more aggressive movement to per-CPU queueing when reacting to
   excessive lock contention due to workloads placing heavy update-side
   stress on RCU tasks.

 - Improvements to RCU priority boosting, including changes from Neeraj
   Upadhyay, Zqiang, and Alison Chaiken.

 - Various fixes improving test robustness and debug information.

 - Add tests for SRCU size transitions, further compress torture.sh
   build products, and improve debug output.

 - Miscellaneous fixes.

* tag 'rcu.2022.03.13a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu: (49 commits)
  rcu: Replace cpumask_weight with cpumask_empty where appropriate
  rcu: Remove __read_mostly annotations from rcu_scheduler_active externs
  rcu: Uninline multi-use function: finish_rcuwait()
  rcu: Mark writes to the rcu_segcblist structure's ->flags field
  kasan: Record work creation stack trace with interrupts enabled
  rcu: Inline __call_rcu() into call_rcu()
  rcu: Add mutex for rcu boost kthread spawning and affinity setting
  rcu: Fix description of kvfree_rcu()
  MAINTAINERS:  Add Frederic and Neeraj to their RCU files
  rcutorture: Provide non-power-of-two Tasks RCU scenarios
  rcutorture: Test SRCU size transitions
  torture: Make torture.sh help message match reality
  rcu-tasks: Set ->percpu_enqueue_shift to zero upon contention
  rcu-tasks: Use order_base_2() instead of ilog2()
  rcu: Create and use an rcu_rdp_cpu_online()
  rcu: Make rcu_barrier() no longer block CPU-hotplug operations
  rcu: Rework rcu_barrier() and callback-migration logic
  rcu: Refactor rcu_barrier() empty-list handling
  rcu: Kill rnp->ofl_seq and use only rcu_state.ofl_lock for exclusion
  torture: Change KVM environment variable to RCUTORTURE
  ...
  • Loading branch information
torvalds committed Mar 21, 2022
2 parents a04b1bf + d557819 commit 35dc035
Show file tree
Hide file tree
Showing 31 changed files with 400 additions and 242 deletions.
2 changes: 2 additions & 0 deletions Documentation/admin-guide/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4504,6 +4504,8 @@
(the least-favored priority). Otherwise, when
RCU_BOOST is not set, valid values are 0-99 and
the default is zero (non-realtime operation).
When RCU_NOCB_CPU is set, also adjust the
priority of NOCB callback kthreads.

rcutree.rcu_nocb_gp_stride= [KNL]
Set the number of NOCB callback kthreads in
Expand Down
2 changes: 2 additions & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -16324,6 +16324,8 @@ F: tools/testing/selftests/resctrl/

READ-COPY UPDATE (RCU)
M: "Paul E. McKenney" <[email protected]>
M: Frederic Weisbecker <[email protected]> (kernel/rcu/tree_nocb.h)
M: Neeraj Upadhyay <[email protected]> (kernel/rcu/tasks.h)
M: Josh Triplett <[email protected]>
R: Steven Rostedt <[email protected]>
R: Mathieu Desnoyers <[email protected]>
Expand Down
4 changes: 2 additions & 2 deletions include/linux/rcupdate.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ static inline int rcu_preempt_depth(void)

/* Internal to kernel */
void rcu_init(void);
extern int rcu_scheduler_active __read_mostly;
extern int rcu_scheduler_active;
void rcu_sched_clock_irq(int user);
void rcu_report_dead(unsigned int cpu);
void rcutree_migrate_callbacks(int cpu);
Expand Down Expand Up @@ -924,7 +924,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
*
* kvfree_rcu(ptr);
*
* where @ptr is a pointer to kvfree().
* where @ptr is the pointer to be freed by kvfree().
*
* Please note, head-less way of freeing is permitted to
* use from a context that has to follow might_sleep()
Expand Down
2 changes: 1 addition & 1 deletion include/linux/rcutree.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ static inline void rcu_irq_exit_check_preempt(void) { }
void exit_rcu(void);

void rcu_scheduler_starting(void);
extern int rcu_scheduler_active __read_mostly;
extern int rcu_scheduler_active;
void rcu_end_inkernel_boot(void);
bool rcu_inkernel_boot_has_ended(void);
bool rcu_is_watching(void);
Expand Down
6 changes: 1 addition & 5 deletions include/linux/rcuwait.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,7 @@ static inline void prepare_to_rcuwait(struct rcuwait *w)
rcu_assign_pointer(w->task, current);
}

static inline void finish_rcuwait(struct rcuwait *w)
{
rcu_assign_pointer(w->task, NULL);
__set_current_state(TASK_RUNNING);
}
extern void finish_rcuwait(struct rcuwait *w);

#define rcuwait_wait_event(w, condition, state) \
({ \
Expand Down
9 changes: 4 additions & 5 deletions include/trace/events/rcu.h
Original file line number Diff line number Diff line change
Expand Up @@ -794,16 +794,15 @@ TRACE_EVENT_RCU(rcu_torture_read,
* Tracepoint for rcu_barrier() execution. The string "s" describes
* the rcu_barrier phase:
* "Begin": rcu_barrier() started.
* "CB": An rcu_barrier_callback() invoked a callback, not the last.
* "EarlyExit": rcu_barrier() piggybacked, thus early exit.
* "Inc1": rcu_barrier() piggyback check counter incremented.
* "OfflineNoCBQ": rcu_barrier() found offline no-CBs CPU with callbacks.
* "OnlineQ": rcu_barrier() found online CPU with callbacks.
* "OnlineNQ": rcu_barrier() found online CPU, no callbacks.
* "Inc2": rcu_barrier() piggyback check counter incremented.
* "IRQ": An rcu_barrier_callback() callback posted on remote CPU.
* "IRQNQ": An rcu_barrier_callback() callback found no callbacks.
* "CB": An rcu_barrier_callback() invoked a callback, not the last.
* "LastCB": An rcu_barrier_callback() invoked the last callback.
* "Inc2": rcu_barrier() piggyback check counter incremented.
* "NQ": rcu_barrier() found a CPU with no callbacks.
* "OnlineQ": rcu_barrier() found online CPU with callbacks.
* The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument
* is the count of remaining callbacks, and "done" is the piggybacking count.
*/
Expand Down
4 changes: 2 additions & 2 deletions kernel/rcu/rcu_segcblist.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
static inline void rcu_segcblist_set_flags(struct rcu_segcblist *rsclp,
int flags)
{
rsclp->flags |= flags;
WRITE_ONCE(rsclp->flags, rsclp->flags | flags);
}

static inline void rcu_segcblist_clear_flags(struct rcu_segcblist *rsclp,
int flags)
{
rsclp->flags &= ~flags;
WRITE_ONCE(rsclp->flags, rsclp->flags & ~flags);
}

static inline bool rcu_segcblist_test_flags(struct rcu_segcblist *rsclp,
Expand Down
41 changes: 26 additions & 15 deletions kernel/rcu/rcutorture.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);

static bool rcu_fwd_cb_nodelay; /* Short rcu_torture_delay() delays. */
static atomic_t rcu_fwd_cb_nodelay; /* Short rcu_torture_delay() delays. */

/*
* Allocate an element from the rcu_tortures pool.
Expand Down Expand Up @@ -387,7 +387,7 @@ rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
* period, and we want a long delay occasionally to trigger
* force_quiescent_state. */

if (!READ_ONCE(rcu_fwd_cb_nodelay) &&
if (!atomic_read(&rcu_fwd_cb_nodelay) &&
!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) {
started = cur_ops->get_gp_seq();
ts = rcu_trace_clock_local();
Expand Down Expand Up @@ -674,6 +674,7 @@ static struct rcu_torture_ops srcu_ops = {
.call = srcu_torture_call,
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
.cbflood_max = 50000,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.name = "srcu"
Expand Down Expand Up @@ -708,6 +709,7 @@ static struct rcu_torture_ops srcud_ops = {
.call = srcu_torture_call,
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
.cbflood_max = 50000,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.name = "srcud"
Expand Down Expand Up @@ -997,7 +999,7 @@ static int rcu_torture_boost(void *arg)
goto checkwait;

/* Wait for the next test interval. */
oldstarttime = boost_starttime;
oldstarttime = READ_ONCE(boost_starttime);
while (time_before(jiffies, oldstarttime)) {
schedule_timeout_interruptible(oldstarttime - jiffies);
if (stutter_wait("rcu_torture_boost"))
Expand Down Expand Up @@ -1041,10 +1043,11 @@ static int rcu_torture_boost(void *arg)
* interval. Besides, we are running at RT priority,
* so delays should be relatively rare.
*/
while (oldstarttime == boost_starttime && !kthread_should_stop()) {
while (oldstarttime == READ_ONCE(boost_starttime) && !kthread_should_stop()) {
if (mutex_trylock(&boost_mutex)) {
if (oldstarttime == boost_starttime) {
boost_starttime = jiffies + test_boost_interval * HZ;
WRITE_ONCE(boost_starttime,
jiffies + test_boost_interval * HZ);
n_rcu_torture_boosts++;
}
mutex_unlock(&boost_mutex);
Expand Down Expand Up @@ -1276,7 +1279,7 @@ rcu_torture_writer(void *arg)
boot_ended = rcu_inkernel_boot_has_ended();
stutter_waited = stutter_wait("rcu_torture_writer");
if (stutter_waited &&
!READ_ONCE(rcu_fwd_cb_nodelay) &&
!atomic_read(&rcu_fwd_cb_nodelay) &&
!cur_ops->slow_gps &&
!torture_must_stop() &&
boot_ended)
Expand Down Expand Up @@ -2180,7 +2183,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--)
if (rfp->n_launders_hist[i].n_launders > 0)
break;
mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):",
__func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat);
gps_old = rfp->rcu_launder_gp_seq_start;
Expand All @@ -2193,7 +2195,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
gps_old = gps;
}
pr_cont("\n");
mutex_unlock(&rcu_fwd_mutex);
}

/* Callback function for continuous-flood RCU callbacks. */
Expand Down Expand Up @@ -2281,6 +2282,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
unsigned long stopat;
static DEFINE_TORTURE_RANDOM(trs);

pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
if (!cur_ops->sync)
return; // Cannot do need_resched() forward progress testing without ->sync.
if (cur_ops->call && cur_ops->cb_barrier) {
Expand All @@ -2289,7 +2291,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
}

/* Tight loop containing cond_resched(). */
WRITE_ONCE(rcu_fwd_cb_nodelay, true);
atomic_inc(&rcu_fwd_cb_nodelay);
cur_ops->sync(); /* Later readers see above write. */
if (selfpropcb) {
WRITE_ONCE(fcs.stop, 0);
Expand Down Expand Up @@ -2325,6 +2327,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
if (selfpropcb) {
WRITE_ONCE(fcs.stop, 1);
cur_ops->sync(); /* Wait for running CB to complete. */
pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id);
cur_ops->cb_barrier(); /* Wait for queued callbacks. */
}

Expand All @@ -2333,7 +2336,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
destroy_rcu_head_on_stack(&fcs.rh);
}
schedule_timeout_uninterruptible(HZ / 10); /* Let kthreads recover. */
WRITE_ONCE(rcu_fwd_cb_nodelay, false);
atomic_dec(&rcu_fwd_cb_nodelay);
}

/* Carry out call_rcu() forward-progress testing. */
Expand All @@ -2353,13 +2356,14 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
unsigned long stopat;
unsigned long stoppedat;

pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
if (READ_ONCE(rcu_fwd_emergency_stop))
return; /* Get out of the way quickly, no GP wait! */
if (!cur_ops->call)
return; /* Can't do call_rcu() fwd prog without ->call. */

/* Loop continuously posting RCU callbacks. */
WRITE_ONCE(rcu_fwd_cb_nodelay, true);
atomic_inc(&rcu_fwd_cb_nodelay);
cur_ops->sync(); /* Later readers see above write. */
WRITE_ONCE(rfp->rcu_fwd_startat, jiffies);
stopat = rfp->rcu_fwd_startat + MAX_FWD_CB_JIFFIES;
Expand Down Expand Up @@ -2414,6 +2418,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
n_launders_cb_snap = READ_ONCE(rfp->n_launders_cb);
cver = READ_ONCE(rcu_torture_current_version) - cver;
gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps);
pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id);
cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */
(void)rcu_torture_fwd_prog_cbfree(rfp);

Expand All @@ -2427,11 +2432,13 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
n_launders, n_launders_sa,
n_max_gps, n_max_cbs, cver, gps);
atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs);
mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
rcu_torture_fwd_cb_hist(rfp);
mutex_unlock(&rcu_fwd_mutex);
}
schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
WRITE_ONCE(rcu_fwd_cb_nodelay, false);
atomic_dec(&rcu_fwd_cb_nodelay);
}


Expand Down Expand Up @@ -2511,7 +2518,7 @@ static int rcu_torture_fwd_prog(void *args)
firsttime = false;
WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1);
} else {
while (READ_ONCE(rcu_fwd_seq) == oldseq)
while (READ_ONCE(rcu_fwd_seq) == oldseq && !torture_must_stop())
schedule_timeout_interruptible(1);
oldseq = READ_ONCE(rcu_fwd_seq);
}
Expand Down Expand Up @@ -2905,8 +2912,10 @@ rcu_torture_cleanup(void)
int i;

if (torture_cleanup_begin()) {
if (cur_ops->cb_barrier != NULL)
if (cur_ops->cb_barrier != NULL) {
pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
cur_ops->cb_barrier();
}
return;
}
if (!cur_ops) {
Expand Down Expand Up @@ -2961,8 +2970,10 @@ rcu_torture_cleanup(void)
* Wait for all RCU callbacks to fire, then do torture-type-specific
* cleanup operations.
*/
if (cur_ops->cb_barrier != NULL)
if (cur_ops->cb_barrier != NULL) {
pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
cur_ops->cb_barrier();
}
if (cur_ops->cleanup != NULL)
cur_ops->cleanup();

Expand Down
6 changes: 3 additions & 3 deletions kernel/rcu/tasks.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ static struct rcu_tasks rt_name = \
.call_func = call, \
.rtpcpu = &rt_name ## __percpu, \
.name = n, \
.percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1, \
.percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS), \
.percpu_enqueue_lim = 1, \
.percpu_dequeue_lim = 1, \
.barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \
Expand Down Expand Up @@ -302,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
if (unlikely(needadjust)) {
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rtp->percpu_enqueue_lim != nr_cpu_ids) {
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1);
WRITE_ONCE(rtp->percpu_enqueue_shift, 0);
WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids);
smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids);
pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
Expand Down Expand Up @@ -417,7 +417,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) {
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rtp->percpu_enqueue_lim > 1) {
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1);
WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids));
smp_store_release(&rtp->percpu_enqueue_lim, 1);
rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
Expand Down
Loading

0 comments on commit 35dc035

Please sign in to comment.