Skip to content

Commit

Permalink
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm…
Browse files Browse the repository at this point in the history
…/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar:
 "So we have a laundry list of locking subsystem changes:

   - continuing barrier API and code improvements

   - futex enhancements

   - atomics API improvements

   - pvqspinlock enhancements: in particular lock stealing and adaptive
     spinning

   - qspinlock micro-enhancements"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  futex: Allow FUTEX_CLOCK_REALTIME with FUTEX_WAIT op
  futex: Cleanup the goto confusion in requeue_pi()
  futex: Remove pointless put_pi_state calls in requeue()
  futex: Document pi_state refcounting in requeue code
  futex: Rename free_pi_state() to put_pi_state()
  futex: Drop refcount if requeue_pi() acquired the rtmutex
  locking/barriers, arch: Remove ambiguous statement in the smp_store_mb() documentation
  lcoking/barriers, arch: Use smp barriers in smp_store_release()
  locking/cmpxchg, arch: Remove tas() definitions
  locking/pvqspinlock: Queue node adaptive spinning
  locking/pvqspinlock: Allow limited lock stealing
  locking/pvqspinlock: Collect slowpath lock statistics
  sched/core, locking: Document Program-Order guarantees
  locking, sched: Introduce smp_cond_acquire() and use it
  locking/pvqspinlock, x86: Optimize the PV unlock code path
  locking/qspinlock: Avoid redundant read of next pointer
  locking/qspinlock: Prefetch the next node cacheline
  locking/qspinlock: Use _acquire/_release() versions of cmpxchg() & xchg()
  atomics: Add test for atomic operations with _relaxed variants
  • Loading branch information
torvalds committed Jan 11, 2016
2 parents 9061cbe + 337f130 commit 24af98c
Show file tree
Hide file tree
Showing 20 changed files with 904 additions and 146 deletions.
4 changes: 2 additions & 2 deletions Documentation/memory-barriers.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1673,8 +1673,8 @@ There are some more advanced barrier functions:
(*) smp_store_mb(var, value)

This assigns the value to the variable and then inserts a full memory
barrier after it, depending on the function. It isn't guaranteed to
insert anything more than a compiler barrier in a UP compilation.
barrier after it. It isn't guaranteed to insert anything more than a
compiler barrier in a UP compilation.


(*) smp_mb__before_atomic();
Expand Down
1 change: 0 additions & 1 deletion arch/blackfin/include/asm/cmpxchg.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,5 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
#endif /* !CONFIG_SMP */

#define xchg(ptr, x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
#define tas(ptr) ((void)xchg((ptr), 1))

#endif /* __ARCH_BLACKFIN_CMPXCHG__ */
2 changes: 0 additions & 2 deletions arch/c6x/include/asm/cmpxchg.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@ static inline unsigned int __xchg(unsigned int x, volatile void *ptr, int size)
#define xchg(ptr, x) \
((__typeof__(*(ptr)))__xchg((unsigned int)(x), (void *) (ptr), \
sizeof(*(ptr))))
#define tas(ptr) xchg((ptr), 1)


#include <asm-generic/cmpxchg-local.h>

Expand Down
2 changes: 0 additions & 2 deletions arch/frv/include/asm/cmpxchg.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);

#endif

#define tas(ptr) (xchg((ptr), 1))

/*****************************************************************************/
/*
* compare and conditionally exchange value with memory
Expand Down
2 changes: 1 addition & 1 deletion arch/ia64/include/asm/barrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ do { \
___p1; \
})

#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)

/*
* The group barrier in front of the rsm & ssm are necessary to ensure
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/include/asm/barrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")

#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)

#ifdef __SUBARCH_HAS_LWSYNC
# define SMPWMB LWSYNC
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/include/asm/barrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()

#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)

#define smp_store_release(p, v) \
do { \
Expand Down
2 changes: 0 additions & 2 deletions arch/tile/include/asm/cmpxchg.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,6 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n);

#endif

#define tas(ptr) xchg((ptr), 1)

#endif /* __ASSEMBLY__ */

#endif /* _ASM_TILE_CMPXCHG_H */
8 changes: 8 additions & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,14 @@ config PARAVIRT_SPINLOCKS

If you are unsure how to answer this question, answer Y.

config QUEUED_LOCK_STAT
bool "Paravirt queued spinlock statistics"
depends on PARAVIRT_SPINLOCKS && DEBUG_FS && QUEUED_SPINLOCKS
---help---
Enable the collection of statistical data on the slowpath
behavior of paravirtualized queued spinlocks and report
them on debugfs.

source "arch/x86/xen/Kconfig"

config KVM_GUEST
Expand Down
59 changes: 59 additions & 0 deletions arch/x86/include/asm/qspinlock_paravirt.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,65 @@
#ifndef __ASM_QSPINLOCK_PARAVIRT_H
#define __ASM_QSPINLOCK_PARAVIRT_H

/*
* For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit
* registers. For i386, however, only 1 32-bit register needs to be saved
* and restored. So an optimized version of __pv_queued_spin_unlock() is
* hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit.
*/
#ifdef CONFIG_64BIT

PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
#define __pv_queued_spin_unlock __pv_queued_spin_unlock
#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock"
#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath"

/*
* Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
* which combines the registers saving trunk and the body of the following
* C code:
*
* void __pv_queued_spin_unlock(struct qspinlock *lock)
* {
* struct __qspinlock *l = (void *)lock;
* u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
*
* if (likely(lockval == _Q_LOCKED_VAL))
* return;
* pv_queued_spin_unlock_slowpath(lock, lockval);
* }
*
* For x86-64,
* rdi = lock (first argument)
* rsi = lockval (second argument)
* rdx = internal variable (set to 0)
*/
asm (".pushsection .text;"
".globl " PV_UNLOCK ";"
".align 4,0x90;"
PV_UNLOCK ": "
"push %rdx;"
"mov $0x1,%eax;"
"xor %edx,%edx;"
"lock cmpxchg %dl,(%rdi);"
"cmp $0x1,%al;"
"jne .slowpath;"
"pop %rdx;"
"ret;"
".slowpath: "
"push %rsi;"
"movzbl %al,%esi;"
"call " PV_UNLOCK_SLOWPATH ";"
"pop %rsi;"
"pop %rdx;"
"ret;"
".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
".popsection");

#else /* CONFIG_64BIT */

extern void __pv_queued_spin_unlock(struct qspinlock *lock);
PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);

#endif /* CONFIG_64BIT */
#endif
2 changes: 1 addition & 1 deletion include/asm-generic/barrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@
#endif /* CONFIG_SMP */

#ifndef smp_store_mb
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#endif

#ifndef smp_mb__before_atomic
Expand Down
9 changes: 5 additions & 4 deletions include/asm-generic/qspinlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
* GNU General Public License for more details.
*
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
* (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
*
* Authors: Waiman Long <waiman.long@hp.com>
* Authors: Waiman Long <waiman.long@hpe.com>
*/
#ifndef __ASM_GENERIC_QSPINLOCK_H
#define __ASM_GENERIC_QSPINLOCK_H
Expand Down Expand Up @@ -62,7 +63,7 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
static __always_inline int queued_spin_trylock(struct qspinlock *lock)
{
if (!atomic_read(&lock->val) &&
(atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
(atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
return 1;
return 0;
}
Expand All @@ -77,7 +78,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
{
u32 val;

val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
if (likely(val == 0))
return;
queued_spin_lock_slowpath(lock, val);
Expand All @@ -93,7 +94,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
/*
* smp_mb__before_atomic() in order to guarantee release semantics
*/
smp_mb__before_atomic_dec();
smp_mb__before_atomic();
atomic_sub(_Q_LOCKED_VAL, &lock->val);
}
#endif
Expand Down
17 changes: 17 additions & 0 deletions include/linux/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,23 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
__u.__val; \
})

/**
* smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
* @cond: boolean expression to wait for
*
* Equivalent to using smp_load_acquire() on the condition variable but employs
* the control dependency of the wait to reduce the barrier on many platforms.
*
* The control dependency provides a LOAD->STORE order, the additional RMB
* provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
* aka. ACQUIRE.
*/
#define smp_cond_acquire(cond) do { \
while (!(cond)) \
cpu_relax(); \
smp_rmb(); /* ctrl + rmb := acquire */ \
} while (0)

#endif /* __KERNEL__ */

#endif /* __ASSEMBLY__ */
Expand Down
83 changes: 61 additions & 22 deletions kernel/futex.c
Original file line number Diff line number Diff line change
Expand Up @@ -725,9 +725,12 @@ static struct futex_pi_state * alloc_pi_state(void)
}

/*
* Drops a reference to the pi_state object and frees or caches it
* when the last reference is gone.
*
* Must be called with the hb lock held.
*/
static void free_pi_state(struct futex_pi_state *pi_state)
static void put_pi_state(struct futex_pi_state *pi_state)
{
if (!pi_state)
return;
Expand Down Expand Up @@ -1706,31 +1709,35 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* exist yet, look it up one more time to ensure we have a
* reference to it. If the lock was taken, ret contains the
* vpid of the top waiter task.
* If the lock was not taken, we have pi_state and an initial
* refcount on it. In case of an error we have nothing.
*/
if (ret > 0) {
WARN_ON(pi_state);
drop_count++;
task_count++;
/*
* If we acquired the lock, then the user
* space value of uaddr2 should be vpid. It
* cannot be changed by the top waiter as it
* is blocked on hb2 lock if it tries to do
* so. If something fiddled with it behind our
* back the pi state lookup might unearth
* it. So we rather use the known value than
* rereading and handing potential crap to
* lookup_pi_state.
* If we acquired the lock, then the user space value
* of uaddr2 should be vpid. It cannot be changed by
* the top waiter as it is blocked on hb2 lock if it
* tries to do so. If something fiddled with it behind
* our back the pi state lookup might unearth it. So
* we rather use the known value than rereading and
* handing potential crap to lookup_pi_state.
*
* If that call succeeds then we have pi_state and an
* initial refcount on it.
*/
ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
}

switch (ret) {
case 0:
/* We hold a reference on the pi state. */
break;

/* If the above failed, then pi_state is NULL */
case -EFAULT:
free_pi_state(pi_state);
pi_state = NULL;
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
put_futex_key(&key2);
Expand All @@ -1746,8 +1753,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* exit to complete.
* - The user space value changed.
*/
free_pi_state(pi_state);
pi_state = NULL;
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
put_futex_key(&key2);
Expand Down Expand Up @@ -1801,30 +1806,58 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* of requeue_pi if we couldn't acquire the lock atomically.
*/
if (requeue_pi) {
/* Prepare the waiter to take the rt_mutex. */
/*
* Prepare the waiter to take the rt_mutex. Take a
* refcount on the pi_state and store the pointer in
* the futex_q object of the waiter.
*/
atomic_inc(&pi_state->refcount);
this->pi_state = pi_state;
ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
this->rt_waiter,
this->task);
if (ret == 1) {
/* We got the lock. */
/*
* We got the lock. We do neither drop the
* refcount on pi_state nor clear
* this->pi_state because the waiter needs the
* pi_state for cleaning up the user space
* value. It will drop the refcount after
* doing so.
*/
requeue_pi_wake_futex(this, &key2, hb2);
drop_count++;
continue;
} else if (ret) {
/* -EDEADLK */
/*
* rt_mutex_start_proxy_lock() detected a
* potential deadlock when we tried to queue
* that waiter. Drop the pi_state reference
* which we took above and remove the pointer
* to the state from the waiters futex_q
* object.
*/
this->pi_state = NULL;
free_pi_state(pi_state);
goto out_unlock;
put_pi_state(pi_state);
/*
* We stop queueing more waiters and let user
* space deal with the mess.
*/
break;
}
}
requeue_futex(this, hb1, hb2, &key2);
drop_count++;
}

/*
* We took an extra initial reference to the pi_state either
* in futex_proxy_trylock_atomic() or in lookup_pi_state(). We
* need to drop it here again.
*/
put_pi_state(pi_state);

out_unlock:
free_pi_state(pi_state);
double_unlock_hb(hb1, hb2);
wake_up_q(&wake_q);
hb_waiters_dec(hb2);
Expand Down Expand Up @@ -1973,7 +2006,7 @@ static void unqueue_me_pi(struct futex_q *q)
__unqueue_futex(q);

BUG_ON(!q->pi_state);
free_pi_state(q->pi_state);
put_pi_state(q->pi_state);
q->pi_state = NULL;

spin_unlock(q->lock_ptr);
Expand Down Expand Up @@ -2755,6 +2788,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (q.pi_state && (q.pi_state->owner != current)) {
spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current);
/*
* Drop the reference to the pi state which
* the requeue_pi() code acquired for us.
*/
put_pi_state(q.pi_state);
spin_unlock(q.lock_ptr);
}
} else {
Expand Down Expand Up @@ -3046,7 +3084,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,

if (op & FUTEX_CLOCK_REALTIME) {
flags |= FLAGS_CLOCKRT;
if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
cmd != FUTEX_WAIT_REQUEUE_PI)
return -ENOSYS;
}

Expand Down
Loading

0 comments on commit 24af98c

Please sign in to comment.