Skip to content

Commit

Permalink
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm…
Browse files Browse the repository at this point in the history
…/linux/kernel/git/tip/tip

Pull core locking updates from Ingo Molnar:
 "The main changes are:

   - mutex, completions and rtmutex micro-optimizations
   - lock debugging fix
   - various cleanups in the MCS and the futex code"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  locking/rtmutex: Optimize setting task running after being blocked
  locking/rwsem: Use task->state helpers
  sched/completion: Add lock-free checking of the blocking case
  sched/completion: Remove unnecessary ->wait.lock serialization when reading completion state
  locking/mutex: Explicitly mark task as running after wakeup
  futex: Fix argument handling in futex_lock_pi() calls
  doc: Fix misnamed FUTEX_CMP_REQUEUE_PI op constants
  locking/Documentation: Update code path
  softirq/preempt: Add missing current->preempt_disable_ip update
  locking/osq: No need for load/acquire when acquire-polling
  locking/mcs: Better differentiate between MCS variants
  locking/mutex: Introduce ww_mutex_set_context_slowpath()
  locking/mutex: Move MCS related comments to proper location
  locking/mutex: Checking the stamp is WW only
  • Loading branch information
torvalds committed Feb 9, 2015
2 parents 23e8fe2 + afffc6c commit 8308756
Show file tree
Hide file tree
Showing 14 changed files with 78 additions and 78 deletions.
8 changes: 4 additions & 4 deletions Documentation/futex-requeue-pi.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
allow the requeue code to acquire an uncontended rt_mutex on behalf
of the waiter and to enqueue the waiter on a contended rt_mutex.
Two new system calls provide the kernel<->user interface to
requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI.
requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI.

FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
and pthread_cond_timedwait()) to block on the initial futex and wait
Expand All @@ -107,7 +107,7 @@ result of a high-speed collision between futex_wait() and
futex_lock_pi(), with some extra logic to check for the additional
wake-up scenarios.

FUTEX_REQUEUE_CMP_PI is called by the waker
FUTEX_CMP_REQUEUE_PI is called by the waker
(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
possibly wake the waiting tasks. Internally, this system call is
still handled by futex_requeue (by passing requeue_pi=1). Before
Expand All @@ -120,12 +120,12 @@ task as a waiter on the underlying rt_mutex. It is possible that
the lock can be acquired at this stage as well, if so, the next
waiter is woken to finish the acquisition of the lock.

FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
FUTEX_CMP_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
their sum is all that really matters. futex_requeue() will wake or
requeue up to nr_wake + nr_requeue tasks. It will wake only as many
tasks as it can acquire the lock for, which in the majority of cases
should be 0 as good programming practice dictates that the caller of
either pthread_cond_broadcast() or pthread_cond_signal() acquire the
mutex prior to making the call. FUTEX_REQUEUE_PI requires that
mutex prior to making the call. FUTEX_CMP_REQUEUE_PI requires that
nr_wake=1. nr_requeue should be INT_MAX for broadcast and 0 for
signal.
2 changes: 1 addition & 1 deletion Documentation/locking/lockdep-design.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ The validator tracks lock-class usage history into 4n + 1 separate state bits:
- 'ever held with STATE enabled'
- 'ever held as readlock with STATE enabled'

Where STATE can be either one of (kernel/lockdep_states.h)
Where STATE can be either one of (kernel/locking/lockdep_states.h)
- hardirq
- softirq
- reclaim_fs
Expand Down
12 changes: 10 additions & 2 deletions include/linux/osq_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
* An MCS like lock especially tailored for optimistic spinning for sleeping
* lock implementations (mutex, rwsem, etc).
*/

#define OSQ_UNLOCKED_VAL (0)
struct optimistic_spin_node {
struct optimistic_spin_node *next, *prev;
int locked; /* 1 if lock acquired */
int cpu; /* encoded CPU # + 1 value */
};

struct optimistic_spin_queue {
/*
Expand All @@ -16,6 +19,8 @@ struct optimistic_spin_queue {
atomic_t tail;
};

#define OSQ_UNLOCKED_VAL (0)

/* Init macro and function. */
#define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) }

Expand All @@ -24,4 +29,7 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock)
atomic_set(&lock->tail, OSQ_UNLOCKED_VAL);
}

extern bool osq_lock(struct optimistic_spin_queue *lock);
extern void osq_unlock(struct optimistic_spin_queue *lock);

#endif
4 changes: 4 additions & 0 deletions kernel/Kconfig.locks
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,10 @@ config RWSEM_SPIN_ON_OWNER
def_bool y
depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW

config LOCK_SPIN_ON_OWNER
def_bool y
depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER

config ARCH_USE_QUEUE_RWLOCK
bool

Expand Down
6 changes: 3 additions & 3 deletions kernel/futex.c
Original file line number Diff line number Diff line change
Expand Up @@ -2258,7 +2258,7 @@ static long futex_wait_restart(struct restart_block *restart)
* if there are waiters then it will block, it does PI, etc. (Due to
* races the kernel might see a 0 value of the futex too.)
*/
static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock)
{
struct hrtimer_sleeper timeout, *to = NULL;
Expand Down Expand Up @@ -2953,11 +2953,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
case FUTEX_WAKE_OP:
return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
case FUTEX_LOCK_PI:
return futex_lock_pi(uaddr, flags, val, timeout, 0);
return futex_lock_pi(uaddr, flags, timeout, 0);
case FUTEX_UNLOCK_PI:
return futex_unlock_pi(uaddr, flags);
case FUTEX_TRYLOCK_PI:
return futex_lock_pi(uaddr, flags, 0, timeout, 1);
return futex_lock_pi(uaddr, flags, NULL, 1);
case FUTEX_WAIT_REQUEUE_PI:
val3 = FUTEX_BITSET_MATCH_ANY;
return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
Expand Down
3 changes: 2 additions & 1 deletion kernel/locking/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o
obj-y += mutex.o semaphore.o rwsem.o

ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = -pg
Expand All @@ -14,6 +14,7 @@ ifeq ($(CONFIG_PROC_FS),y)
obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
endif
obj-$(CONFIG_SMP) += spinlock.o
obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
obj-$(CONFIG_SMP) += lglock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
Expand Down
16 changes: 0 additions & 16 deletions kernel/locking/mcs_spinlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,4 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
arch_mcs_spin_unlock_contended(&next->locked);
}

/*
* Cancellable version of the MCS lock above.
*
* Intended for adaptive spinning of sleeping locks:
* mutex_lock()/rwsem_down_{read,write}() etc.
*/

struct optimistic_spin_node {
struct optimistic_spin_node *next, *prev;
int locked; /* 1 if lock acquired */
int cpu; /* encoded CPU # value */
};

extern bool osq_lock(struct optimistic_spin_queue *lock);
extern void osq_unlock(struct optimistic_spin_queue *lock);

#endif /* __LINUX_MCS_SPINLOCK_H */
60 changes: 32 additions & 28 deletions kernel/locking/mutex.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
}

/*
* after acquiring lock with fastpath or when we lost out in contested
* After acquiring lock with fastpath or when we lost out in contested
* slowpath, set ctx and wake up any waiters so they can recheck.
*
* This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set,
Expand Down Expand Up @@ -191,19 +191,32 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
spin_unlock_mutex(&lock->base.wait_lock, flags);
}


#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
* In order to avoid a stampede of mutex spinners from acquiring the mutex
* more or less simultaneously, the spinners need to acquire a MCS lock
* first before spinning on the owner field.
* After acquiring lock in the slowpath set ctx and wake up any
* waiters so they can recheck.
*
* Callers must hold the mutex wait_lock.
*/
static __always_inline void
ww_mutex_set_context_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
struct mutex_waiter *cur;

/*
* Mutex spinning code migrated from kernel/sched/core.c
*/
ww_mutex_lock_acquired(lock, ctx);
lock->ctx = ctx;

/*
* Give any possible sleeping processes the chance to wake up,
* so they can recheck if they have to back off.
*/
list_for_each_entry(cur, &lock->base.wait_list, list) {
debug_mutex_wake_waiter(&lock->base, cur);
wake_up_process(cur->task);
}
}

#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
{
if (lock->owner != owner)
Expand Down Expand Up @@ -307,6 +320,11 @@ static bool mutex_optimistic_spin(struct mutex *lock,
if (!mutex_can_spin_on_owner(lock))
goto done;

/*
* In order to avoid a stampede of mutex spinners trying to
* acquire the mutex all at once, the spinners need to take a
* MCS (queued) lock first before spinning on the owner field.
*/
if (!osq_lock(&lock->osq))
goto done;

Expand Down Expand Up @@ -469,7 +487,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock)
EXPORT_SYMBOL(ww_mutex_unlock);

static inline int __sched
__mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
{
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
Expand Down Expand Up @@ -557,7 +575,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
}

if (use_ww_ctx && ww_ctx->acquired > 0) {
ret = __mutex_lock_check_stamp(lock, ww_ctx);
ret = __ww_mutex_lock_check_stamp(lock, ww_ctx);
if (ret)
goto err;
}
Expand All @@ -569,6 +587,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
schedule_preempt_disabled();
spin_lock_mutex(&lock->wait_lock, flags);
}
__set_task_state(task, TASK_RUNNING);

mutex_remove_waiter(lock, &waiter, current_thread_info());
/* set it to 0 if there are no waiters left: */
if (likely(list_empty(&lock->wait_list)))
Expand All @@ -582,23 +602,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,

if (use_ww_ctx) {
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct mutex_waiter *cur;

/*
* This branch gets optimized out for the common case,
* and is only important for ww_mutex_lock.
*/
ww_mutex_lock_acquired(ww, ww_ctx);
ww->ctx = ww_ctx;

/*
* Give any possible sleeping processes the chance to wake up,
* so they can recheck if they have to back off.
*/
list_for_each_entry(cur, &lock->wait_list, list) {
debug_mutex_wake_waiter(lock, cur);
wake_up_process(cur->task);
}
ww_mutex_set_context_slowpath(ww, ww_ctx);
}

spin_unlock_mutex(&lock->wait_lock, flags);
Expand Down
9 changes: 2 additions & 7 deletions kernel/locking/mcs_spinlock.c → kernel/locking/osq_lock.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include "mcs_spinlock.h"

#ifdef CONFIG_SMP
#include <linux/osq_lock.h>

/*
* An MCS like lock especially tailored for optimistic spinning for sleeping
Expand Down Expand Up @@ -111,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
* cmpxchg in an attempt to undo our queueing.
*/

while (!smp_load_acquire(&node->locked)) {
while (!ACCESS_ONCE(node->locked)) {
/*
* If we need to reschedule bail... so we can block.
*/
Expand Down Expand Up @@ -203,6 +201,3 @@ void osq_unlock(struct optimistic_spin_queue *lock)
if (next)
ACCESS_ONCE(next->locked) = 1;
}

#endif

7 changes: 3 additions & 4 deletions kernel/locking/rtmutex.c
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
set_current_state(state);
}

__set_current_state(TASK_RUNNING);
return ret;
}

Expand Down Expand Up @@ -1188,10 +1189,9 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);

if (likely(!ret))
/* sleep on the mutex */
ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);

set_current_state(TASK_RUNNING);

if (unlikely(ret)) {
remove_waiter(lock, &waiter);
rt_mutex_handle_deadlock(ret, chwalk, &waiter);
Expand Down Expand Up @@ -1626,10 +1626,9 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,

set_current_state(TASK_INTERRUPTIBLE);

/* sleep on the mutex */
ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);

set_current_state(TASK_RUNNING);

if (unlikely(ret))
remove_waiter(lock, waiter);

Expand Down
2 changes: 1 addition & 1 deletion kernel/locking/rwsem-spinlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ void __sched __down_read(struct rw_semaphore *sem)
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
}

tsk->state = TASK_RUNNING;
__set_task_state(tsk, TASK_RUNNING);
out:
;
}
Expand Down
3 changes: 1 addition & 2 deletions kernel/locking/rwsem-xadd.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
schedule();
}

tsk->state = TASK_RUNNING;

__set_task_state(tsk, TASK_RUNNING);
return sem;
}
EXPORT_SYMBOL(rwsem_down_read_failed);
Expand Down
18 changes: 10 additions & 8 deletions kernel/sched/completion.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,15 @@ bool try_wait_for_completion(struct completion *x)
unsigned long flags;
int ret = 1;

/*
* Since x->done will need to be locked only
* in the non-blocking case, we check x->done
* first without taking the lock so we can
* return early in the blocking case.
*/
if (!ACCESS_ONCE(x->done))
return 0;

spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
Expand All @@ -288,13 +297,6 @@ EXPORT_SYMBOL(try_wait_for_completion);
*/
bool completion_done(struct completion *x)
{
unsigned long flags;
int ret = 1;

spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
return !!ACCESS_ONCE(x->done);
}
EXPORT_SYMBOL(completion_done);
6 changes: 5 additions & 1 deletion kernel/softirq.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,12 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
trace_softirqs_off(ip);
raw_local_irq_restore(flags);

if (preempt_count() == cnt)
if (preempt_count() == cnt) {
#ifdef CONFIG_DEBUG_PREEMPT
current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
#endif
trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
}
}
EXPORT_SYMBOL(__local_bh_disable_ip);
#endif /* CONFIG_TRACE_IRQFLAGS */
Expand Down

0 comments on commit 8308756

Please sign in to comment.