Skip to content

Commit

Permalink
membarrier: Provide core serializing command, *_SYNC_CORE
Browse files Browse the repository at this point in the history
Provide core serializing membarrier command to support memory reclaim
by JIT.

Each architecture needs to explicitly opt into that support by
documenting in their architecture code how they provide the core
serializing instructions required when returning from the membarrier
IPI, and after the scheduler has updated the curr->mm pointer (before
going back to user-space). They should then select
ARCH_HAS_MEMBARRIER_SYNC_CORE to enable support for that command on
their architecture.

Architectures selecting this feature need to either document that
they issue core serializing instructions when returning to user-space,
or implement their architecture-specific sync_core_before_usermode().

Signed-off-by: Mathieu Desnoyers <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
Acked-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Andrea Parri <[email protected]>
Cc: Andrew Hunter <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Avi Kivity <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Boqun Feng <[email protected]>
Cc: Dave Watson <[email protected]>
Cc: David Sehr <[email protected]>
Cc: Greg Hackmann <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Maged Michael <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Paul E. McKenney <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Russell King <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
compudj authored and Ingo Molnar committed Feb 5, 2018
1 parent ac1ab12 commit 70216e1
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 18 deletions.
18 changes: 18 additions & 0 deletions include/linux/sched/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/gfp.h>
#include <linux/sync_core.h>

/*
* Routines for handling mm_structs
Expand Down Expand Up @@ -223,12 +224,26 @@ enum {
MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1),
MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2),
MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3),
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4),
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5),
};

enum {
MEMBARRIER_FLAG_SYNC_CORE = (1U << 0),
};

#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
#include <asm/membarrier.h>
#endif

static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
{
if (likely(!(atomic_read(&mm->membarrier_state) &
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
return;
sync_core_before_usermode();
}

static inline void membarrier_execve(struct task_struct *t)
{
atomic_set(&t->mm->membarrier_state, 0);
Expand All @@ -244,6 +259,9 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
static inline void membarrier_execve(struct task_struct *t)
{
}
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
{
}
#endif

#endif /* _LINUX_SCHED_MM_H */
32 changes: 31 additions & 1 deletion include/uapi/linux/membarrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
* to and return from the system call
* (non-running threads are de facto in such a
* state). This only covers threads from the
* same processes as the caller thread. This
* same process as the caller thread. This
* command returns 0 on success. The
* "expedited" commands complete faster than
* the non-expedited ones, they never block,
Expand All @@ -86,6 +86,34 @@
* Register the process intent to use
* MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
* returns 0.
* @MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
* In addition to provide memory ordering
* guarantees described in
* MEMBARRIER_CMD_PRIVATE_EXPEDITED, ensure
* the caller thread, upon return from system
* call, that all its running threads siblings
* have executed a core serializing
* instruction. (architectures are required to
* guarantee that non-running threads issue
* core serializing instructions before they
* resume user-space execution). This only
* covers threads from the same process as the
* caller thread. This command returns 0 on
* success. The "expedited" commands complete
* faster than the non-expedited ones, they
* never block, but have the downside of
* causing extra overhead. If this command is
* not implemented by an architecture, -EINVAL
* is returned. A process needs to register its
* intent to use the private expedited sync
* core command prior to using it, otherwise
* this command returns -EPERM.
* @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
* Register the process intent to use
* MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE.
* If this command is not implemented by an
* architecture, -EINVAL is returned.
* Returns 0 on success.
* @MEMBARRIER_CMD_SHARED:
* Alias to MEMBARRIER_CMD_GLOBAL. Provided for
* header backward compatibility.
Expand All @@ -101,6 +129,8 @@ enum membarrier_cmd {
MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED = (1 << 2),
MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3),
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4),
MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 5),
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 6),

/* Alias for header backward compatibility. */
MEMBARRIER_CMD_SHARED = MEMBARRIER_CMD_GLOBAL,
Expand Down
3 changes: 3 additions & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1415,6 +1415,9 @@ config USERFAULTFD
config ARCH_HAS_MEMBARRIER_CALLBACKS
bool

config ARCH_HAS_MEMBARRIER_SYNC_CORE
bool

config EMBEDDED
bool "Embedded system"
option allnoconfig_y
Expand Down
18 changes: 13 additions & 5 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2704,13 +2704,21 @@ static struct rq *finish_task_switch(struct task_struct *prev)

fire_sched_in_preempt_notifiers(current);
/*
* When transitioning from a kernel thread to a userspace
* thread, mmdrop()'s implicit full barrier is required by the
* membarrier system call, because the current ->active_mm can
* become the current mm without going through switch_mm().
* When switching through a kernel thread, the loop in
* membarrier_{private,global}_expedited() may have observed that
* kernel thread and not issued an IPI. It is therefore possible to
* schedule between user->kernel->user threads without passing though
* switch_mm(). Membarrier requires a barrier after storing to
* rq->curr, before returning to userspace, so provide them here:
*
* - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
* provided by mmdrop(),
* - a sync_core for SYNC_CORE.
*/
if (mm)
if (mm) {
membarrier_mm_sync_core_before_usermode(mm);
mmdrop(mm);
}
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
Expand Down
53 changes: 41 additions & 12 deletions kernel/sched/membarrier.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,20 @@
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
* except MEMBARRIER_CMD_QUERY.
*/
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
#else
#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0
#endif

#define MEMBARRIER_CMD_BITMASK \
(MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
| MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)

static void ipi_mb(void *info)
{
Expand Down Expand Up @@ -104,15 +113,23 @@ static int membarrier_global_expedited(void)
return 0;
}

static int membarrier_private_expedited(void)
static int membarrier_private_expedited(int flags)
{
int cpu;
bool fallback = false;
cpumask_var_t tmpmask;

if (!(atomic_read(&current->mm->membarrier_state)
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
return -EPERM;
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
return -EINVAL;
if (!(atomic_read(&current->mm->membarrier_state) &
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
return -EPERM;
} else {
if (!(atomic_read(&current->mm->membarrier_state) &
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
return -EPERM;
}

if (num_online_cpus() == 1)
return 0;
Expand Down Expand Up @@ -205,29 +222,37 @@ static int membarrier_register_global_expedited(void)
return 0;
}

static int membarrier_register_private_expedited(void)
static int membarrier_register_private_expedited(int flags)
{
struct task_struct *p = current;
struct mm_struct *mm = p->mm;
int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY;

if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
return -EINVAL;
state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
}

/*
* We need to consider threads belonging to different thread
* groups, which use the same mm. (CLONE_VM but not
* CLONE_THREAD).
*/
if (atomic_read(&mm->membarrier_state)
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
if (atomic_read(&mm->membarrier_state) & state)
return 0;
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
if (flags & MEMBARRIER_FLAG_SYNC_CORE)
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE,
&mm->membarrier_state);
if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
/*
* Ensure all future scheduler executions will observe the
* new thread flag state for this process.
*/
synchronize_sched();
}
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
&mm->membarrier_state);
atomic_or(state, &mm->membarrier_state);
return 0;
}

Expand Down Expand Up @@ -283,9 +308,13 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
return membarrier_register_global_expedited();
case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
return membarrier_private_expedited();
return membarrier_private_expedited(0);
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
return membarrier_register_private_expedited();
return membarrier_register_private_expedited(0);
case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
default:
return -EINVAL;
}
Expand Down

0 comments on commit 70216e1

Please sign in to comment.