Skip to content

Commit

Permalink
rcu/context-tracking: Move RCU-dynticks internal functions to context…
Browse files Browse the repository at this point in the history
…_tracking

Move the core RCU eqs/dynticks functions to context tracking so that
we can later merge all that code within context tracking.

Acked-by: Paul E. McKenney <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Neeraj Upadhyay <[email protected]>
Cc: Uladzislau Rezki <[email protected]>
Cc: Joel Fernandes <[email protected]>
Cc: Boqun Feng <[email protected]>
Cc: Nicolas Saenz Julienne <[email protected]>
Cc: Marcelo Tosatti <[email protected]>
Cc: Xiongfeng Wang <[email protected]>
Cc: Yu Liao <[email protected]>
Cc: Phil Auld <[email protected]>
Cc: Paul Gortmaker<[email protected]>
Cc: Alex Belits <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
Reviewed-by: Nicolas Saenz Julienne <[email protected]>
Tested-by: Nicolas Saenz Julienne <[email protected]>
  • Loading branch information
Frederic Weisbecker authored and paulmckrcu committed Jul 5, 2022
1 parent 5645064 commit 1721145
Show file tree
Hide file tree
Showing 6 changed files with 364 additions and 362 deletions.
20 changes: 20 additions & 0 deletions include/linux/context_tracking.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,26 @@ static inline void context_tracking_init(void) { }
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
extern void ct_idle_enter(void);
extern void ct_idle_exit(void);

/*
* Is the current CPU in an extended quiescent state?
*
* No ordering, as we are sampling CPU-local information.
*/
static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
{
return !(arch_atomic_read(this_cpu_ptr(&context_tracking.dynticks)) & 0x1);
}

/*
* Increment the current CPU's context_tracking structure's ->dynticks field
* with ordering. Return the new value.
*/
static __always_inline unsigned long rcu_dynticks_inc(int incby)
{
return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.dynticks));
}

#else
static inline void ct_idle_enter(void) { }
static inline void ct_idle_exit(void) { }
Expand Down
3 changes: 3 additions & 0 deletions include/linux/rcutree.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ void rcu_irq_exit_check_preempt(void);
static inline void rcu_irq_exit_check_preempt(void) { }
#endif

struct task_struct;
void rcu_preempt_deferred_qs(struct task_struct *t);

void exit_rcu(void);

void rcu_scheduler_starting(void);
Expand Down
336 changes: 336 additions & 0 deletions kernel/context_tracking.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <linux/hardirq.h>
#include <linux/export.h>
#include <linux/kprobes.h>
#include <trace/events/rcu.h>


DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
Expand All @@ -33,6 +34,309 @@ DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
EXPORT_SYMBOL_GPL(context_tracking);

#ifdef CONFIG_CONTEXT_TRACKING_IDLE
#define TPS(x) tracepoint_string(x)

/* Record the current task on dyntick-idle entry. */
static __always_inline void rcu_dynticks_task_enter(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
}

/* Record no current task on dyntick-idle exit. */
static __always_inline void rcu_dynticks_task_exit(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
}

/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
static __always_inline void rcu_dynticks_task_trace_enter(void)
{
#ifdef CONFIG_TASKS_TRACE_RCU
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
current->trc_reader_special.b.need_mb = true;
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}

/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
static __always_inline void rcu_dynticks_task_trace_exit(void)
{
#ifdef CONFIG_TASKS_TRACE_RCU
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
current->trc_reader_special.b.need_mb = false;
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}

/*
* Record entry into an extended quiescent state. This is only to be
* called when not already in an extended quiescent state, that is,
* RCU is watching prior to the call to this function and is no longer
* watching upon return.
*/
static noinstr void rcu_dynticks_eqs_enter(void)
{
int seq;

/*
* CPUs seeing atomic_add_return() must see prior RCU read-side
* critical sections, and we also must force ordering with the
* next idle sojourn.
*/
rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
seq = rcu_dynticks_inc(1);
// RCU is no longer watching. Better be in extended quiescent state!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1));
}

/*
* Record exit from an extended quiescent state. This is only to be
* called from an extended quiescent state, that is, RCU is not watching
* prior to the call to this function and is watching upon return.
*/
static noinstr void rcu_dynticks_eqs_exit(void)
{
int seq;

/*
* CPUs seeing atomic_add_return() must see prior idle sojourns,
* and we also must force ordering with the next RCU read-side
* critical section.
*/
seq = rcu_dynticks_inc(1);
// RCU is now watching. Better not be in an extended quiescent state!
rcu_dynticks_task_trace_exit(); // After ->dynticks update!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1));
}

/*
* Enter an RCU extended quiescent state, which can be either the
* idle loop or adaptive-tickless usermode execution.
*
* We crowbar the ->dynticks_nmi_nesting field to zero to allow for
* the possibility of usermode upcalls having messed up our count
* of interrupt nesting level during the prior busy period.
*/
static void noinstr rcu_eqs_enter(bool user)
{
struct context_tracking *ct = this_cpu_ptr(&context_tracking);

WARN_ON_ONCE(ct_dynticks_nmi_nesting() != DYNTICK_IRQ_NONIDLE);
WRITE_ONCE(ct->dynticks_nmi_nesting, 0);
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
ct_dynticks_nesting() == 0);
if (ct_dynticks_nesting() != 1) {
// RCU will still be watching, so just do accounting and leave.
ct->dynticks_nesting--;
return;
}

instrumentation_begin();
lockdep_assert_irqs_disabled();
trace_rcu_dyntick(TPS("Start"), ct_dynticks_nesting(), 0, ct_dynticks());
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
rcu_preempt_deferred_qs(current);

// instrumentation for the noinstr rcu_dynticks_eqs_enter()
instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));

instrumentation_end();
WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */
// RCU is watching here ...
rcu_dynticks_eqs_enter();
// ... but is no longer watching here.
rcu_dynticks_task_enter();
}

/*
* Exit an RCU extended quiescent state, which can be either the
* idle loop or adaptive-tickless usermode execution.
*
* We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to
* allow for the possibility of usermode upcalls messing up our count of
* interrupt nesting level during the busy period that is just now starting.
*/
static void noinstr rcu_eqs_exit(bool user)
{
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
long oldval;

WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
oldval = ct_dynticks_nesting();
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
if (oldval) {
// RCU was already watching, so just do accounting and leave.
ct->dynticks_nesting++;
return;
}
rcu_dynticks_task_exit();
// RCU is not watching here ...
rcu_dynticks_eqs_exit();
// ... but is watching here.
instrumentation_begin();

// instrumentation for the noinstr rcu_dynticks_eqs_exit()
instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));

trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks());
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
WRITE_ONCE(ct->dynticks_nesting, 1);
WARN_ON_ONCE(ct_dynticks_nmi_nesting());
WRITE_ONCE(ct->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
instrumentation_end();
}

/**
* rcu_nmi_exit - inform RCU of exit from NMI context
*
* If we are returning from the outermost NMI handler that interrupted an
* RCU-idle period, update ct->dynticks and ct->dynticks_nmi_nesting
* to let the RCU grace-period handling know that the CPU is back to
* being RCU-idle.
*
* If you add or remove a call to rcu_nmi_exit(), be sure to test
* with CONFIG_RCU_EQS_DEBUG=y.
*/
void noinstr rcu_nmi_exit(void)
{
struct context_tracking *ct = this_cpu_ptr(&context_tracking);

instrumentation_begin();
/*
* Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
* (We are exiting an NMI handler, so RCU better be paying attention
* to us!)
*/
WARN_ON_ONCE(ct_dynticks_nmi_nesting() <= 0);
WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());

/*
* If the nesting level is not 1, the CPU wasn't RCU-idle, so
* leave it in non-RCU-idle state.
*/
if (ct_dynticks_nmi_nesting() != 1) {
trace_rcu_dyntick(TPS("--="), ct_dynticks_nmi_nesting(), ct_dynticks_nmi_nesting() - 2,
ct_dynticks());
WRITE_ONCE(ct->dynticks_nmi_nesting, /* No store tearing. */
ct_dynticks_nmi_nesting() - 2);
instrumentation_end();
return;
}

/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks());
WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */

// instrumentation for the noinstr rcu_dynticks_eqs_enter()
instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
instrumentation_end();

// RCU is watching here ...
rcu_dynticks_eqs_enter();
// ... but is no longer watching here.

if (!in_nmi())
rcu_dynticks_task_enter();
}

/**
* rcu_nmi_enter - inform RCU of entry to NMI context
*
* If the CPU was idle from RCU's viewpoint, update ct->dynticks and
* ct->dynticks_nmi_nesting to let the RCU grace-period handling know
* that the CPU is active. This implementation permits nested NMIs, as
* long as the nesting level does not overflow an int. (You will probably
* run out of stack space first.)
*
* If you add or remove a call to rcu_nmi_enter(), be sure to test
* with CONFIG_RCU_EQS_DEBUG=y.
*/
void noinstr rcu_nmi_enter(void)
{
long incby = 2;
struct context_tracking *ct = this_cpu_ptr(&context_tracking);

/* Complain about underflow. */
WARN_ON_ONCE(ct_dynticks_nmi_nesting() < 0);

/*
* If idle from RCU viewpoint, atomically increment ->dynticks
* to mark non-idle and increment ->dynticks_nmi_nesting by one.
* Otherwise, increment ->dynticks_nmi_nesting by two. This means
* if ->dynticks_nmi_nesting is equal to one, we are guaranteed
* to be in the outermost NMI handler that interrupted an RCU-idle
* period (observation due to Andy Lutomirski).
*/
if (rcu_dynticks_curr_cpu_in_eqs()) {

if (!in_nmi())
rcu_dynticks_task_exit();

// RCU is not watching here ...
rcu_dynticks_eqs_exit();
// ... but is watching here.

instrumentation_begin();
// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
instrument_atomic_read(&ct->dynticks, sizeof(ct->dynticks));
// instrumentation for the noinstr rcu_dynticks_eqs_exit()
instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));

incby = 1;
} else if (!in_nmi()) {
instrumentation_begin();
rcu_irq_enter_check_tick();
} else {
instrumentation_begin();
}

trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
ct_dynticks_nmi_nesting(),
ct_dynticks_nmi_nesting() + incby, ct_dynticks());
instrumentation_end();
WRITE_ONCE(ct->dynticks_nmi_nesting, /* Prevent store tearing. */
ct_dynticks_nmi_nesting() + incby);
barrier();
}

/**
* rcu_idle_enter - inform RCU that current CPU is entering idle
*
* Enter idle mode, in other words, -leave- the mode in which RCU
* read-side critical sections can occur. (Though RCU read-side
* critical sections can occur in irq handlers in idle, a possibility
* handled by irq_enter() and irq_exit().)
*
* If you add or remove a call to rcu_idle_enter(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
void noinstr rcu_idle_enter(void)
{
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
rcu_eqs_enter(false);
}

/**
* rcu_idle_exit - inform RCU that current CPU is leaving idle
*
* Exit idle mode, in other words, -enter- the mode in which RCU
* read-side critical sections can occur.
*
* If you add or remove a call to rcu_idle_exit(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
void noinstr rcu_idle_exit(void)
{
unsigned long flags;

raw_local_irq_save(flags);
rcu_eqs_exit(false);
raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_exit);

noinstr void ct_idle_enter(void)
{
rcu_idle_enter();
Expand Down Expand Up @@ -139,6 +443,38 @@ noinstr void ct_nmi_exit(void)
}
#endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */

#ifdef CONFIG_NO_HZ_FULL
/**
* rcu_user_enter - inform RCU that we are resuming userspace.
*
* Enter RCU idle mode right before resuming userspace. No use of RCU
* is permitted between this call and rcu_user_exit(). This way the
* CPU doesn't need to maintain the tick for RCU maintenance purposes
* when the CPU runs in userspace.
*
* If you add or remove a call to rcu_user_enter(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
noinstr void rcu_user_enter(void)
{
rcu_eqs_enter(true);
}

/**
* rcu_user_exit - inform RCU that we are exiting userspace.
*
* Exit RCU idle mode while entering the kernel because it can
* run a RCU read side critical section anytime.
*
* If you add or remove a call to rcu_user_exit(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
void noinstr rcu_user_exit(void)
{
rcu_eqs_exit(true);
}
#endif /* #ifdef CONFIG_NO_HZ_FULL */

#ifdef CONFIG_CONTEXT_TRACKING_USER

#define CREATE_TRACE_POINTS
Expand Down
Loading

0 comments on commit 1721145

Please sign in to comment.