Skip to content

Commit

Permalink
kcsan: Add core support for a subset of weak memory modeling
Browse files Browse the repository at this point in the history
Add support for modeling a subset of weak memory, which will enable
detection of a subset of data races due to missing memory barriers.

KCSAN's approach to detecting missing memory barriers is based on
modeling access reordering, and enabled if `CONFIG_KCSAN_WEAK_MEMORY=y`,
which depends on `CONFIG_KCSAN_STRICT=y`. The feature can be enabled or
disabled at boot and runtime via the `kcsan.weak_memory` boot parameter.

Each memory access for which a watchpoint is set up, is also selected
for simulated reordering within the scope of its function (at most 1
in-flight access).

We are limited to modeling the effects of "buffering" (delaying the
access), since the runtime cannot "prefetch" accesses (therefore no
acquire modeling). Once an access has been selected for reordering, it
is checked along every other access until the end of the function scope.
If an appropriate memory barrier is encountered, the access will no
longer be considered for reordering.

When the result of a memory operation should be ordered by a barrier,
KCSAN can then detect data races where the conflict only occurs as a
result of a missing barrier due to reordering accesses.

Suggested-by: Dmitry Vyukov <[email protected]>
Signed-off-by: Marco Elver <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
  • Loading branch information
melver authored and paulmckrcu committed Dec 10, 2021
1 parent 9756f64 commit 69562e4
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 19 deletions.
10 changes: 9 additions & 1 deletion include/linux/kcsan-checks.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,15 @@ void kcsan_set_access_mask(unsigned long mask);

/* Scoped access information. */
struct kcsan_scoped_access {
struct list_head list;
union {
struct list_head list; /* scoped_accesses list */
/*
* Not an entry in scoped_accesses list; stack depth from where
* the access was initialized.
*/
int stack_depth;
};

/* Access information. */
const volatile void *ptr;
size_t size;
Expand Down
10 changes: 9 additions & 1 deletion include/linux/kcsan.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,16 @@ struct kcsan_ctx {
*/
unsigned long access_mask;

/* List of scoped accesses. */
/* List of scoped accesses; likely to be empty. */
struct list_head scoped_accesses;

#ifdef CONFIG_KCSAN_WEAK_MEMORY
/*
* Scoped access for modeling access reordering to detect missing memory
* barriers; only keep 1 to keep fast-path complexity manageable.
*/
struct kcsan_scoped_access reorder_access;
#endif
};

/**
Expand Down
3 changes: 3 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1339,6 +1339,9 @@ struct task_struct {
#ifdef CONFIG_TRACE_IRQFLAGS
struct irqtrace_events kcsan_save_irqtrace;
#endif
#ifdef CONFIG_KCSAN_WEAK_MEMORY
int kcsan_stack_depth;
#endif
#endif

#if IS_ENABLED(CONFIG_KUNIT)
Expand Down
202 changes: 187 additions & 15 deletions kernel/kcsan/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@ module_param_named(udelay_interrupt, kcsan_udelay_interrupt, uint, 0644);
module_param_named(skip_watch, kcsan_skip_watch, long, 0644);
module_param_named(interrupt_watcher, kcsan_interrupt_watcher, bool, 0444);

#ifdef CONFIG_KCSAN_WEAK_MEMORY
static bool kcsan_weak_memory = true;
module_param_named(weak_memory, kcsan_weak_memory, bool, 0644);
#else
#define kcsan_weak_memory false
#endif

bool kcsan_enabled;

/* Per-CPU kcsan_ctx for interrupts */
Expand Down Expand Up @@ -351,6 +358,67 @@ void kcsan_restore_irqtrace(struct task_struct *task)
#endif
}

static __always_inline int get_kcsan_stack_depth(void)
{
#ifdef CONFIG_KCSAN_WEAK_MEMORY
return current->kcsan_stack_depth;
#else
BUILD_BUG();
return 0;
#endif
}

static __always_inline void add_kcsan_stack_depth(int val)
{
#ifdef CONFIG_KCSAN_WEAK_MEMORY
current->kcsan_stack_depth += val;
#else
BUILD_BUG();
#endif
}

static __always_inline struct kcsan_scoped_access *get_reorder_access(struct kcsan_ctx *ctx)
{
#ifdef CONFIG_KCSAN_WEAK_MEMORY
return ctx->disable_scoped ? NULL : &ctx->reorder_access;
#else
return NULL;
#endif
}

static __always_inline bool
find_reorder_access(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size,
int type, unsigned long ip)
{
struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx);

if (!reorder_access)
return false;

/*
* Note: If accesses are repeated while reorder_access is identical,
* never matches the new access, because !(type & KCSAN_ACCESS_SCOPED).
*/
return reorder_access->ptr == ptr && reorder_access->size == size &&
reorder_access->type == type && reorder_access->ip == ip;
}

static inline void
set_reorder_access(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size,
int type, unsigned long ip)
{
struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx);

if (!reorder_access || !kcsan_weak_memory)
return;

reorder_access->ptr = ptr;
reorder_access->size = size;
reorder_access->type = type | KCSAN_ACCESS_SCOPED;
reorder_access->ip = ip;
reorder_access->stack_depth = get_kcsan_stack_depth();
}

/*
* Pull everything together: check_access() below contains the performance
* critical operations; the fast-path (including check_access) functions should
Expand Down Expand Up @@ -389,8 +457,10 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
* The access_mask check relies on value-change comparison. To avoid
* reporting a race where e.g. the writer set up the watchpoint, but the
* reader has access_mask!=0, we have to ignore the found watchpoint.
*
* reorder_access is never created from an access with access_mask set.
*/
if (ctx->access_mask)
if (ctx->access_mask && !find_reorder_access(ctx, ptr, size, type, ip))
return;

/*
Expand Down Expand Up @@ -440,11 +510,13 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned
const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0;
atomic_long_t *watchpoint;
u64 old, new, diff;
unsigned long access_mask;
enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE;
bool interrupt_watcher = kcsan_interrupt_watcher;
unsigned long ua_flags = user_access_save();
struct kcsan_ctx *ctx = get_ctx();
unsigned long access_mask = ctx->access_mask;
unsigned long irq_flags = 0;
bool is_reorder_access;

/*
* Always reset kcsan_skip counter in slow-path to avoid underflow; see
Expand All @@ -467,6 +539,17 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned
goto out;
}

/*
* The local CPU cannot observe reordering of its own accesses, and
* therefore we need to take care of 2 cases to avoid false positives:
*
* 1. Races of the reordered access with interrupts. To avoid, if
* the current access is reorder_access, disable interrupts.
* 2. Avoid races of scoped accesses from nested interrupts (below).
*/
is_reorder_access = find_reorder_access(ctx, ptr, size, type, ip);
if (is_reorder_access)
interrupt_watcher = false;
/*
* Avoid races of scoped accesses from nested interrupts (or scheduler).
* Assume setting up a watchpoint for a non-scoped (normal) access that
Expand All @@ -482,7 +565,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned
* information is lost if dirtied by KCSAN.
*/
kcsan_save_irqtrace(current);
if (!kcsan_interrupt_watcher)
if (!interrupt_watcher)
local_irq_save(irq_flags);

watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
Expand All @@ -503,7 +586,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned
* Read the current value, to later check and infer a race if the data
* was modified via a non-instrumented access, e.g. from a device.
*/
old = read_instrumented_memory(ptr, size);
old = is_reorder_access ? 0 : read_instrumented_memory(ptr, size);

/*
* Delay this thread, to increase probability of observing a racy
Expand All @@ -515,8 +598,17 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned
* Re-read value, and check if it is as expected; if not, we infer a
* racy access.
*/
access_mask = ctx->access_mask;
new = read_instrumented_memory(ptr, size);
if (!is_reorder_access) {
new = read_instrumented_memory(ptr, size);
} else {
/*
* Reordered accesses cannot be used for value change detection,
* because the memory location may no longer be accessible and
* could result in a fault.
*/
new = 0;
access_mask = 0;
}

diff = old ^ new;
if (access_mask)
Expand Down Expand Up @@ -585,19 +677,27 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned
*/
remove_watchpoint(watchpoint);
atomic_long_dec(&kcsan_counters[KCSAN_COUNTER_USED_WATCHPOINTS]);

out_unlock:
if (!kcsan_interrupt_watcher)
if (!interrupt_watcher)
local_irq_restore(irq_flags);
kcsan_restore_irqtrace(current);
ctx->disable_scoped--;

/*
* Reordered accesses cannot be used for value change detection,
* therefore never consider for reordering if access_mask is set.
* ASSERT_EXCLUSIVE are not real accesses, ignore them as well.
*/
if (!access_mask && !is_assert)
set_reorder_access(ctx, ptr, size, type, ip);
out:
user_access_restore(ua_flags);
}

static __always_inline void
check_access(const volatile void *ptr, size_t size, int type, unsigned long ip)
{
const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0;
atomic_long_t *watchpoint;
long encoded_watchpoint;

Expand All @@ -608,12 +708,14 @@ check_access(const volatile void *ptr, size_t size, int type, unsigned long ip)
if (unlikely(size == 0))
return;

again:
/*
* Avoid user_access_save in fast-path: find_watchpoint is safe without
* user_access_save, as the address that ptr points to is only used to
* check if a watchpoint exists; ptr is never dereferenced.
*/
watchpoint = find_watchpoint((unsigned long)ptr, size, !is_write,
watchpoint = find_watchpoint((unsigned long)ptr, size,
!(type & KCSAN_ACCESS_WRITE),
&encoded_watchpoint);
/*
* It is safe to check kcsan_is_enabled() after find_watchpoint in the
Expand All @@ -627,9 +729,42 @@ check_access(const volatile void *ptr, size_t size, int type, unsigned long ip)
else {
struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */

if (unlikely(should_watch(ctx, ptr, size, type)))
if (unlikely(should_watch(ctx, ptr, size, type))) {
kcsan_setup_watchpoint(ptr, size, type, ip);
else if (unlikely(ctx->scoped_accesses.prev))
return;
}

if (!(type & KCSAN_ACCESS_SCOPED)) {
struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx);

if (reorder_access) {
/*
* reorder_access check: simulates reordering of
* the access after subsequent operations.
*/
ptr = reorder_access->ptr;
type = reorder_access->type;
ip = reorder_access->ip;
/*
* Upon a nested interrupt, this context's
* reorder_access can be modified (shared ctx).
* We know that upon return, reorder_access is
* always invalidated by setting size to 0 via
* __tsan_func_exit(). Therefore we must read
* and check size after the other fields.
*/
barrier();
size = READ_ONCE(reorder_access->size);
if (size)
goto again;
}
}

/*
* Always checked last, right before returning from runtime;
* if reorder_access is valid, checked after it was checked.
*/
if (unlikely(ctx->scoped_accesses.prev))
kcsan_check_scoped_accesses();
}
}
Expand Down Expand Up @@ -916,19 +1051,56 @@ DEFINE_TSAN_VOLATILE_READ_WRITE(8);
DEFINE_TSAN_VOLATILE_READ_WRITE(16);

/*
* The below are not required by KCSAN, but can still be emitted by the
* compiler.
* Function entry and exit are used to determine the validty of reorder_access.
* Reordering of the access ends at the end of the function scope where the
* access happened. This is done for two reasons:
*
* 1. Artificially limits the scope where missing barriers are detected.
* This minimizes false positives due to uninstrumented functions that
* contain the required barriers but were missed.
*
* 2. Simplifies generating the stack trace of the access.
*/
void __tsan_func_entry(void *call_pc);
void __tsan_func_entry(void *call_pc)
noinline void __tsan_func_entry(void *call_pc)
{
if (!IS_ENABLED(CONFIG_KCSAN_WEAK_MEMORY))
return;

add_kcsan_stack_depth(1);
}
EXPORT_SYMBOL(__tsan_func_entry);

void __tsan_func_exit(void);
void __tsan_func_exit(void)
noinline void __tsan_func_exit(void)
{
struct kcsan_scoped_access *reorder_access;

if (!IS_ENABLED(CONFIG_KCSAN_WEAK_MEMORY))
return;

reorder_access = get_reorder_access(get_ctx());
if (!reorder_access)
goto out;

if (get_kcsan_stack_depth() <= reorder_access->stack_depth) {
/*
* Access check to catch cases where write without a barrier
* (supposed release) was last access in function: because
* instrumentation is inserted before the real access, a data
* race due to the write giving up a c-s would only be caught if
* we do the conflicting access after.
*/
check_access(reorder_access->ptr, reorder_access->size,
reorder_access->type, reorder_access->ip);
reorder_access->size = 0;
reorder_access->stack_depth = INT_MIN;
}
out:
add_kcsan_stack_depth(-1);
}
EXPORT_SYMBOL(__tsan_func_exit);

void __tsan_init(void);
void __tsan_init(void)
{
Expand Down
20 changes: 20 additions & 0 deletions lib/Kconfig.kcsan
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,26 @@ config KCSAN_STRICT
closely aligns with the rules defined by the Linux-kernel memory
consistency model (LKMM).

config KCSAN_WEAK_MEMORY
bool "Enable weak memory modeling to detect missing memory barriers"
default y
depends on KCSAN_STRICT
# We can either let objtool nop __tsan_func_{entry,exit}() and builtin
# atomics instrumentation in .noinstr.text, or use a compiler that can
# implement __no_kcsan to really remove all instrumentation.
depends on STACK_VALIDATION || CC_IS_GCC
help
Enable support for modeling a subset of weak memory, which allows
detecting a subset of data races due to missing memory barriers.

Depends on KCSAN_STRICT, because the options strenghtening certain
plain accesses by default (depending on !KCSAN_STRICT) reduce the
ability to detect any data races invoving reordered accesses, in
particular reordered writes.

Weak memory modeling relies on additional instrumentation and may
affect performance.

config KCSAN_REPORT_VALUE_CHANGE_ONLY
bool "Only report races where watcher observed a data value change"
default y
Expand Down
Loading

0 comments on commit 69562e4

Please sign in to comment.