Skip to content

Commit

Permalink
locking/barriers: Replace smp_cond_acquire() with smp_cond_load_acqui…
Browse files Browse the repository at this point in the history
…re()

This new form allows using hardware assisted waiting.

Some hardware (ARM64 and x86) allow monitoring an address for changes,
so by providing a pointer we can use this to replace the cpu_relax()
with hardware optimized methods in the future.

Requested-by: Will Deacon <[email protected]>
Suggested-by: Linus Torvalds <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Paul E. McKenney <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Jun 14, 2016
1 parent 245050c commit 1f03e8d
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 18 deletions.
25 changes: 19 additions & 6 deletions include/linux/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,21 +305,34 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
})

/**
* smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
* smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
* @ptr: pointer to the variable to wait on
* @cond: boolean expression to wait for
*
* Equivalent to using smp_load_acquire() on the condition variable but employs
* the control dependency of the wait to reduce the barrier on many platforms.
*
* Due to C lacking lambda expressions we load the value of *ptr into a
* pre-named variable @VAL to be used in @cond.
*
* The control dependency provides a LOAD->STORE order, the additional RMB
* provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
* aka. ACQUIRE.
*/
#define smp_cond_acquire(cond) do { \
while (!(cond)) \
cpu_relax(); \
smp_rmb(); /* ctrl + rmb := acquire */ \
} while (0)
#ifndef smp_cond_load_acquire
#define smp_cond_load_acquire(ptr, cond_expr) ({ \
typeof(ptr) __PTR = (ptr); \
typeof(*ptr) VAL; \
for (;;) { \
VAL = READ_ONCE(*__PTR); \
if (cond_expr) \
break; \
cpu_relax(); \
} \
smp_rmb(); /* ctrl + rmb := acquire */ \
VAL; \
})
#endif

#endif /* __KERNEL__ */

Expand Down
12 changes: 6 additions & 6 deletions kernel/locking/qspinlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* sequentiality; this is because not all clear_pending_set_locked()
* implementations imply full barriers.
*/
smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));

/*
* take ownership and clear the pending bit.
Expand Down Expand Up @@ -562,7 +562,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
*
* The PV pv_wait_head_or_lock function, if active, will acquire
* the lock and return a non-zero value. So we have to skip the
* smp_cond_acquire() call. As the next PV queue head hasn't been
* smp_cond_load_acquire() call. As the next PV queue head hasn't been
* designated yet, there is no way for the locked value to become
* _Q_SLOW_VAL. So both the set_locked() and the
* atomic_cmpxchg_relaxed() calls will be safe.
Expand All @@ -573,7 +573,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
if ((val = pv_wait_head_or_lock(lock, node)))
goto locked;

smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));

locked:
/*
Expand All @@ -593,9 +593,9 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
break;
}
/*
* The smp_cond_acquire() call above has provided the necessary
* acquire semantics required for locking. At most two
* iterations of this loop may be ran.
* The smp_cond_load_acquire() call above has provided the
* necessary acquire semantics required for locking. At most
* two iterations of this loop may be ran.
*/
old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
if (old == val)
Expand Down
8 changes: 4 additions & 4 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1935,7 +1935,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
* chain to provide order. Instead we do:
*
* 1) smp_store_release(X->on_cpu, 0)
* 2) smp_cond_acquire(!X->on_cpu)
* 2) smp_cond_load_acquire(!X->on_cpu)
*
* Example:
*
Expand All @@ -1946,7 +1946,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
* sched-out X
* smp_store_release(X->on_cpu, 0);
*
* smp_cond_acquire(!X->on_cpu);
* smp_cond_load_acquire(&X->on_cpu, !VAL);
* X->state = WAKING
* set_task_cpu(X,2)
*
Expand All @@ -1972,7 +1972,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
* This means that any means of doing remote wakeups must order the CPU doing
* the wakeup against the CPU the task is going to end up running on. This,
* however, is already required for the regular Program-Order guarantee above,
* since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
* since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
*
*/

Expand Down Expand Up @@ -2045,7 +2045,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* This ensures that tasks getting woken will be fully ordered against
* their previous state and preserve Program Order.
*/
smp_cond_acquire(!p->on_cpu);
smp_cond_load_acquire(&p->on_cpu, !VAL);

p->sched_contributes_to_load = !!task_contributes_to_load(p);
p->state = TASK_WAKING;
Expand Down
2 changes: 1 addition & 1 deletion kernel/sched/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1113,7 +1113,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
* In particular, the load of prev->state in finish_task_switch() must
* happen before this.
*
* Pairs with the smp_cond_acquire() in try_to_wake_up().
* Pairs with the smp_cond_load_acquire() in try_to_wake_up().
*/
smp_store_release(&prev->on_cpu, 0);
#endif
Expand Down
2 changes: 1 addition & 1 deletion kernel/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ void __init call_function_init(void)
*/
static __always_inline void csd_lock_wait(struct call_single_data *csd)
{
smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
}

static __always_inline void csd_lock(struct call_single_data *csd)
Expand Down

0 comments on commit 1f03e8d

Please sign in to comment.