Skip to content

Commit

Permalink
Merge branches 'doc.2021.07.20c', 'fixes.2021.08.06a', 'nocb.2021.07.…
Browse files Browse the repository at this point in the history
…20c', 'nolibc.2021.07.20c', 'tasks.2021.07.20c', 'torture.2021.07.27a' and 'torturescript.2021.07.27a' into HEAD

doc.2021.07.20c: Documentation updates.
fixes.2021.08.06a: Miscellaneous fixes.
nocb.2021.07.20c: Callback-offloading (NOCB CPU) updates.
nolibc.2021.07.20c: Tiny userspace library updates.
tasks.2021.07.20c: Tasks RCU updates.
torture.2021.07.27a: In-kernel torture-test updates.
torturescript.2021.07.27a: Torture-test scripting updates.
  • Loading branch information
paulmckrcu committed Aug 10, 2021
7 parents 99c0974 + d3dd95a + cba712b + f916d77 + fed31a4 + ed4fa24 + 06ca914 commit b770efc
Show file tree
Hide file tree
Showing 36 changed files with 2,172 additions and 1,744 deletions.
35 changes: 17 additions & 18 deletions include/linux/rculist.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,6 @@
#include <linux/list.h>
#include <linux/rcupdate.h>

/*
* Why is there no list_empty_rcu()? Because list_empty() serves this
* purpose. The list_empty() function fetches the RCU-protected pointer
* and compares it to the address of the list head, but neither dereferences
* this pointer itself nor provides this pointer to the caller. Therefore,
* it is not necessary to use rcu_dereference(), so that list_empty() can
* be used anywhere you would want to use a list_empty_rcu().
*/

/*
* INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers
* @list: list to be initialized
Expand Down Expand Up @@ -318,21 +309,29 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
/*
* Where are list_empty_rcu() and list_first_entry_rcu()?
*
* Implementing those functions following their counterparts list_empty() and
* list_first_entry() is not advisable because they lead to subtle race
* conditions as the following snippet shows:
* They do not exist because they would lead to subtle race conditions:
*
* if (!list_empty_rcu(mylist)) {
* struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member);
* do_something(bar);
* }
*
* The list may not be empty when list_empty_rcu checks it, but it may be when
* list_first_entry_rcu rereads the ->next pointer.
*
* Rereading the ->next pointer is not a problem for list_empty() and
* list_first_entry() because they would be protected by a lock that blocks
* writers.
* The list might be non-empty when list_empty_rcu() checks it, but it
* might have become empty by the time that list_first_entry_rcu() rereads
* the ->next pointer, which would result in a SEGV.
*
* When not using RCU, it is OK for list_first_entry() to re-read that
* pointer because both functions should be protected by some lock that
* blocks writers.
*
* When using RCU, list_empty() uses READ_ONCE() to fetch the
* RCU-protected ->next pointer and then compares it to the address of the
* list head. However, it neither dereferences this pointer nor provides
* this pointer to its caller. Thus, READ_ONCE() suffices (that is,
* rcu_dereference() is not needed), which means that list_empty() can be
* used anywhere you would want to use list_empty_rcu(). Just don't
* expect anything useful to happen if you do a subsequent lockless
* call to list_first_entry_rcu()!!!
*
* See list_first_or_null_rcu for an alternative.
*/
Expand Down
4 changes: 2 additions & 2 deletions include/linux/rcupdate.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void __rcu_read_unlock(void);
* nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other
* types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
*/
#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
#define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting)

#else /* #ifdef CONFIG_PREEMPT_RCU */

Expand Down Expand Up @@ -167,7 +167,7 @@ void synchronize_rcu_tasks(void);
# define synchronize_rcu_tasks synchronize_rcu
# endif

# ifdef CONFIG_TASKS_RCU_TRACE
# ifdef CONFIG_TASKS_TRACE_RCU
# define rcu_tasks_trace_qs(t) \
do { \
if (!likely(READ_ONCE((t)->trc_reader_checked)) && \
Expand Down
3 changes: 0 additions & 3 deletions include/linux/rcutiny.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@

#include <asm/param.h> /* for HZ */

/* Never flag non-existent other CPUs! */
static inline bool rcu_eqs_special_set(int cpu) { return false; }

unsigned long get_state_synchronize_rcu(void);
unsigned long start_poll_synchronize_rcu(void);
bool poll_state_synchronize_rcu(unsigned long oldstate);
Expand Down
8 changes: 4 additions & 4 deletions include/linux/srcutiny.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp)
int idx;

idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1;
WRITE_ONCE(ssp->srcu_lock_nesting[idx], ssp->srcu_lock_nesting[idx] + 1);
WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1);
return idx;
}

Expand All @@ -81,11 +81,11 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
{
int idx;

idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1;
idx = ((data_race(READ_ONCE(ssp->srcu_idx)) + 1) & 0x2) >> 1;
pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
tt, tf, idx,
READ_ONCE(ssp->srcu_lock_nesting[!idx]),
READ_ONCE(ssp->srcu_lock_nesting[idx]));
data_race(READ_ONCE(ssp->srcu_lock_nesting[!idx])),
data_race(READ_ONCE(ssp->srcu_lock_nesting[idx])));
}

#endif
25 changes: 13 additions & 12 deletions kernel/locking/locktorture.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ static struct task_struct **writer_tasks;
static struct task_struct **reader_tasks;

static bool lock_is_write_held;
static bool lock_is_read_held;
static atomic_t lock_is_read_held;
static unsigned long last_lock_release;

struct lock_stress_stats {
Expand Down Expand Up @@ -682,7 +682,7 @@ static int lock_torture_writer(void *arg)
if (WARN_ON_ONCE(lock_is_write_held))
lwsp->n_lock_fail++;
lock_is_write_held = true;
if (WARN_ON_ONCE(lock_is_read_held))
if (WARN_ON_ONCE(atomic_read(&lock_is_read_held)))
lwsp->n_lock_fail++; /* rare, but... */

lwsp->n_lock_acquired++;
Expand Down Expand Up @@ -717,13 +717,13 @@ static int lock_torture_reader(void *arg)
schedule_timeout_uninterruptible(1);

cxt.cur_ops->readlock(tid);
lock_is_read_held = true;
atomic_inc(&lock_is_read_held);
if (WARN_ON_ONCE(lock_is_write_held))
lrsp->n_lock_fail++; /* rare, but... */

lrsp->n_lock_acquired++;
cxt.cur_ops->read_delay(&rand);
lock_is_read_held = false;
atomic_dec(&lock_is_read_held);
cxt.cur_ops->readunlock(tid);

stutter_wait("lock_torture_reader");
Expand All @@ -738,20 +738,22 @@ static int lock_torture_reader(void *arg)
static void __torture_print_stats(char *page,
struct lock_stress_stats *statp, bool write)
{
long cur;
bool fail = false;
int i, n_stress;
long max = 0, min = statp ? statp[0].n_lock_acquired : 0;
long max = 0, min = statp ? data_race(statp[0].n_lock_acquired) : 0;
long long sum = 0;

n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress;
for (i = 0; i < n_stress; i++) {
if (statp[i].n_lock_fail)
if (data_race(statp[i].n_lock_fail))
fail = true;
sum += statp[i].n_lock_acquired;
if (max < statp[i].n_lock_acquired)
max = statp[i].n_lock_acquired;
if (min > statp[i].n_lock_acquired)
min = statp[i].n_lock_acquired;
cur = data_race(statp[i].n_lock_acquired);
sum += cur;
if (max < cur)
max = cur;
if (min > cur)
min = cur;
}
page += sprintf(page,
"%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n",
Expand Down Expand Up @@ -996,7 +998,6 @@ static int __init lock_torture_init(void)
}

if (nreaders_stress) {
lock_is_read_held = false;
cxt.lrsa = kmalloc_array(cxt.nrealreaders_stress,
sizeof(*cxt.lrsa),
GFP_KERNEL);
Expand Down
4 changes: 2 additions & 2 deletions kernel/rcu/rcuscale.c
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ rcu_scale_writer(void *arg)
if (gp_async) {
cur_ops->gp_barrier();
}
writer_n_durations[me] = i_max;
writer_n_durations[me] = i_max + 1;
torture_kthread_stopping("rcu_scale_writer");
return 0;
}
Expand Down Expand Up @@ -561,7 +561,7 @@ rcu_scale_cleanup(void)
wdpp = writer_durations[i];
if (!wdpp)
continue;
for (j = 0; j <= writer_n_durations[i]; j++) {
for (j = 0; j < writer_n_durations[i]; j++) {
wdp = &wdpp[j];
pr_alert("%s%s %4d writer-duration: %5d %llu\n",
scale_type, SCALE_FLAG,
Expand Down
7 changes: 6 additions & 1 deletion kernel/rcu/rcutorture.c
Original file line number Diff line number Diff line change
Expand Up @@ -2022,8 +2022,13 @@ static int rcu_torture_stall(void *args)
__func__, raw_smp_processor_id());
while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(),
stop_at))
if (stall_cpu_block)
if (stall_cpu_block) {
#ifdef CONFIG_PREEMPTION
preempt_schedule();
#else
schedule_timeout_uninterruptible(HZ);
#endif
}
if (stall_cpu_irqsoff)
local_irq_enable();
else if (!stall_cpu_block)
Expand Down
36 changes: 35 additions & 1 deletion kernel/rcu/refscale.c
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,40 @@ static struct ref_scale_ops acqrel_ops = {
.name = "acqrel"
};

static volatile u64 stopopts;

static void ref_clock_section(const int nloops)
{
u64 x = 0;
int i;

preempt_disable();
for (i = nloops; i >= 0; i--)
x += ktime_get_real_fast_ns();
preempt_enable();
stopopts = x;
}

static void ref_clock_delay_section(const int nloops, const int udl, const int ndl)
{
u64 x = 0;
int i;

preempt_disable();
for (i = nloops; i >= 0; i--) {
x += ktime_get_real_fast_ns();
un_delay(udl, ndl);
}
preempt_enable();
stopopts = x;
}

static struct ref_scale_ops clock_ops = {
.readsection = ref_clock_section,
.delaysection = ref_clock_delay_section,
.name = "clock"
};

static void rcu_scale_one_reader(void)
{
if (readdelay <= 0)
Expand Down Expand Up @@ -759,7 +793,7 @@ ref_scale_init(void)
int firsterr = 0;
static struct ref_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops,
&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops,
&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
};

if (!torture_init_begin(scale_type, verbose))
Expand Down
2 changes: 1 addition & 1 deletion kernel/rcu/srcutiny.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
*/
void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
{
int newval = ssp->srcu_lock_nesting[idx] - 1;
int newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1;

WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval);
if (!newval && READ_ONCE(ssp->srcu_gp_waiting))
Expand Down
36 changes: 23 additions & 13 deletions kernel/rcu/tasks.h
Original file line number Diff line number Diff line change
Expand Up @@ -643,8 +643,8 @@ void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
//
// "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of
// passing an empty function to schedule_on_each_cpu(). This approach
// provides an asynchronous call_rcu_tasks_rude() API and batching
// of concurrent calls to the synchronous synchronize_rcu_rude() API.
// provides an asynchronous call_rcu_tasks_rude() API and batching of
// concurrent calls to the synchronous synchronize_rcu_tasks_rude() API.
// This invokes schedule_on_each_cpu() in order to send IPIs far and wide
// and induces otherwise unnecessary context switches on all online CPUs,
// whether idle or not.
Expand Down Expand Up @@ -785,7 +785,10 @@ EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread);
// set that task's .need_qs flag so that task's next outermost
// rcu_read_unlock_trace() will report the quiescent state (in which
// case the count of readers is incremented). If both attempts fail,
// the task is added to a "holdout" list.
// the task is added to a "holdout" list. Note that IPIs are used
// to invoke trc_read_check_handler() in the context of running tasks
// in order to avoid ordering overhead on common-case shared-variable
// accessses.
// rcu_tasks_trace_postscan():
// Initialize state and attempt to identify an immediate quiescent
// state as above (but only for idle tasks), unblock CPU-hotplug
Expand Down Expand Up @@ -847,7 +850,7 @@ static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw);
/* If we are the last reader, wake up the grace-period kthread. */
void rcu_read_unlock_trace_special(struct task_struct *t, int nesting)
{
int nq = t->trc_reader_special.b.need_qs;
int nq = READ_ONCE(t->trc_reader_special.b.need_qs);

if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
t->trc_reader_special.b.need_mb)
Expand Down Expand Up @@ -894,7 +897,7 @@ static void trc_read_check_handler(void *t_in)

// If the task is not in a read-side critical section, and
// if this is the last reader, awaken the grace-period kthread.
if (likely(!t->trc_reader_nesting)) {
if (likely(!READ_ONCE(t->trc_reader_nesting))) {
if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
wake_up(&trc_wait);
// Mark as checked after decrement to avoid false
Expand All @@ -903,7 +906,7 @@ static void trc_read_check_handler(void *t_in)
goto reset_ipi;
}
// If we are racing with an rcu_read_unlock_trace(), try again later.
if (unlikely(t->trc_reader_nesting < 0)) {
if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) {
if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
wake_up(&trc_wait);
goto reset_ipi;
Expand All @@ -913,14 +916,14 @@ static void trc_read_check_handler(void *t_in)
// Get here if the task is in a read-side critical section. Set
// its state so that it will awaken the grace-period kthread upon
// exit from that critical section.
WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs));
WRITE_ONCE(t->trc_reader_special.b.need_qs, true);

reset_ipi:
// Allow future IPIs to be sent on CPU and for task.
// Also order this IPI handler against any later manipulations of
// the intended task.
smp_store_release(&per_cpu(trc_ipi_to_cpu, smp_processor_id()), false); // ^^^
smp_store_release(per_cpu_ptr(&trc_ipi_to_cpu, smp_processor_id()), false); // ^^^
smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^
}

Expand Down Expand Up @@ -950,6 +953,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
n_heavy_reader_ofl_updates++;
in_qs = true;
} else {
// The task is not running, so C-language access is safe.
in_qs = likely(!t->trc_reader_nesting);
}

Expand All @@ -964,7 +968,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
// state so that it will awaken the grace-period kthread upon exit
// from that critical section.
atomic_inc(&trc_n_readers_need_end); // One more to wait on.
WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs));
WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
return true;
}
Expand All @@ -982,7 +986,7 @@ static void trc_wait_for_one_reader(struct task_struct *t,
// The current task had better be in a quiescent state.
if (t == current) {
t->trc_reader_checked = true;
WARN_ON_ONCE(t->trc_reader_nesting);
WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting));
return;
}

Expand All @@ -994,6 +998,12 @@ static void trc_wait_for_one_reader(struct task_struct *t,
}
put_task_struct(t);

// If this task is not yet on the holdout list, then we are in
// an RCU read-side critical section. Otherwise, the invocation of
// rcu_add_holdout() that added it to the list did the necessary
// get_task_struct(). Either way, the task cannot be freed out
// from under this code.

// If currently running, send an IPI, either way, add to list.
trc_add_holdout(t, bhp);
if (task_curr(t) &&
Expand Down Expand Up @@ -1092,8 +1102,8 @@ static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0],
".i"[is_idle_task(t)],
".N"[cpu > 0 && tick_nohz_full_cpu(cpu)],
t->trc_reader_nesting,
" N"[!!t->trc_reader_special.b.need_qs],
READ_ONCE(t->trc_reader_nesting),
" N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)],
cpu);
sched_show_task(t);
}
Expand Down Expand Up @@ -1187,7 +1197,7 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
static void exit_tasks_rcu_finish_trace(struct task_struct *t)
{
WRITE_ONCE(t->trc_reader_checked, true);
WARN_ON_ONCE(t->trc_reader_nesting);
WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting));
WRITE_ONCE(t->trc_reader_nesting, 0);
if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)))
rcu_read_unlock_trace_special(t, 0);
Expand Down
Loading

0 comments on commit b770efc

Please sign in to comment.