Skip to content

Commit c1fb159

Browse files
Peter Zijlstra (Intel)Ingo Molnar
Peter Zijlstra (Intel)
authored and
Ingo Molnar
committed
locking/qspinlock: Add pending bit
Because the qspinlock needs to touch a second cacheline (the per-cpu mcs_nodes[]); add a pending bit and allow a single in-word spinner before we punt to the second cacheline. It is possible so observe the pending bit without the locked bit when the last owner has just released but the pending owner has not yet taken ownership. In this case we would normally queue -- because the pending bit is already taken. However, in this case the pending bit is guaranteed to be released 'soon', therefore wait for it and avoid queueing. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Waiman Long <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Boris Ostrovsky <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Daniel J Blueman <[email protected]> Cc: David Vrabel <[email protected]> Cc: Douglas Hatch <[email protected]> Cc: H. Peter Anvin <[email protected]> Cc: Konrad Rzeszutek Wilk <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Oleg Nesterov <[email protected]> Cc: Paolo Bonzini <[email protected]> Cc: Paul E. McKenney <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Raghavendra K T <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Scott J Norton <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: [email protected] Cc: [email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent d73a339 commit c1fb159

File tree

2 files changed

+107
-24
lines changed

2 files changed

+107
-24
lines changed

include/asm-generic/qspinlock_types.h

+9-3
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,21 @@ typedef struct qspinlock {
3636
* Bitfields in the atomic value:
3737
*
3838
* 0- 7: locked byte
39-
* 8- 9: tail index
40-
* 10-31: tail cpu (+1)
39+
* 8: pending
40+
* 9-10: tail index
41+
* 11-31: tail cpu (+1)
4142
*/
4243
#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\
4344
<< _Q_ ## type ## _OFFSET)
4445
#define _Q_LOCKED_OFFSET 0
4546
#define _Q_LOCKED_BITS 8
4647
#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED)
4748

48-
#define _Q_TAIL_IDX_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
49+
#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
50+
#define _Q_PENDING_BITS 1
51+
#define _Q_PENDING_MASK _Q_SET_MASK(PENDING)
52+
53+
#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
4954
#define _Q_TAIL_IDX_BITS 2
5055
#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX)
5156

@@ -54,5 +59,6 @@ typedef struct qspinlock {
5459
#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)
5560

5661
#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
62+
#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
5763

5864
#endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */

kernel/locking/qspinlock.c

+98-21
Original file line numberDiff line numberDiff line change
@@ -94,24 +94,28 @@ static inline struct mcs_spinlock *decode_tail(u32 tail)
9494
return per_cpu_ptr(&mcs_nodes[idx], cpu);
9595
}
9696

97+
#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
98+
9799
/**
98100
* queued_spin_lock_slowpath - acquire the queued spinlock
99101
* @lock: Pointer to queued spinlock structure
100102
* @val: Current value of the queued spinlock 32-bit word
101103
*
102-
* (queue tail, lock value)
103-
*
104-
* fast : slow : unlock
105-
* : :
106-
* uncontended (0,0) --:--> (0,1) --------------------------------:--> (*,0)
107-
* : | ^--------. / :
108-
* : v \ | :
109-
* uncontended : (n,x) --+--> (n,0) | :
110-
* queue : | ^--' | :
111-
* : v | :
112-
* contended : (*,x) --+--> (*,0) -----> (*,1) ---' :
113-
* queue : ^--' :
104+
* (queue tail, pending bit, lock value)
114105
*
106+
* fast : slow : unlock
107+
* : :
108+
* uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0)
109+
* : | ^--------.------. / :
110+
* : v \ \ | :
111+
* pending : (0,1,1) +--> (0,1,0) \ | :
112+
* : | ^--' | | :
113+
* : v | | :
114+
* uncontended : (n,x,y) +--> (n,0,0) --' | :
115+
* queue : | ^--' | :
116+
* : v | :
117+
* contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' :
118+
* queue : ^--' :
115119
*/
116120
void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
117121
{
@@ -121,6 +125,75 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
121125

122126
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
123127

128+
/*
129+
* wait for in-progress pending->locked hand-overs
130+
*
131+
* 0,1,0 -> 0,0,1
132+
*/
133+
if (val == _Q_PENDING_VAL) {
134+
while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
135+
cpu_relax();
136+
}
137+
138+
/*
139+
* trylock || pending
140+
*
141+
* 0,0,0 -> 0,0,1 ; trylock
142+
* 0,0,1 -> 0,1,1 ; pending
143+
*/
144+
for (;;) {
145+
/*
146+
* If we observe any contention; queue.
147+
*/
148+
if (val & ~_Q_LOCKED_MASK)
149+
goto queue;
150+
151+
new = _Q_LOCKED_VAL;
152+
if (val == new)
153+
new |= _Q_PENDING_VAL;
154+
155+
old = atomic_cmpxchg(&lock->val, val, new);
156+
if (old == val)
157+
break;
158+
159+
val = old;
160+
}
161+
162+
/*
163+
* we won the trylock
164+
*/
165+
if (new == _Q_LOCKED_VAL)
166+
return;
167+
168+
/*
169+
* we're pending, wait for the owner to go away.
170+
*
171+
* *,1,1 -> *,1,0
172+
*/
173+
while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
174+
cpu_relax();
175+
176+
/*
177+
* take ownership and clear the pending bit.
178+
*
179+
* *,1,0 -> *,0,1
180+
*/
181+
for (;;) {
182+
new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
183+
184+
old = atomic_cmpxchg(&lock->val, val, new);
185+
if (old == val)
186+
break;
187+
188+
val = old;
189+
}
190+
return;
191+
192+
/*
193+
* End of pending bit optimistic spinning and beginning of MCS
194+
* queuing.
195+
*/
196+
queue:
124197
node = this_cpu_ptr(&mcs_nodes[0]);
125198
idx = node->count++;
126199
tail = encode_tail(smp_processor_id(), idx);
@@ -130,15 +203,18 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
130203
node->next = NULL;
131204

132205
/*
206+
* We have already touched the queueing cacheline; don't bother with
207+
* pending stuff.
208+
*
133209
* trylock || xchg(lock, node)
134210
*
135-
* 0,0 -> 0,1 ; no tail, not locked -> no tail, locked.
136-
* p,x -> n,x ; tail was p -> tail is n; preserving locked.
211+
* 0,0,0 -> 0,0,1 ; no tail, not locked -> no tail, locked.
212+
* p,y,x -> n,y,x ; tail was p -> tail is n; preserving locked.
137213
*/
138214
for (;;) {
139215
new = _Q_LOCKED_VAL;
140216
if (val)
141-
new = tail | (val & _Q_LOCKED_MASK);
217+
new = tail | (val & _Q_LOCKED_PENDING_MASK);
142218

143219
old = atomic_cmpxchg(&lock->val, val, new);
144220
if (old == val)
@@ -157,26 +233,27 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
157233
* if there was a previous node; link it and wait until reaching the
158234
* head of the waitqueue.
159235
*/
160-
if (old & ~_Q_LOCKED_MASK) {
236+
if (old & ~_Q_LOCKED_PENDING_MASK) {
161237
prev = decode_tail(old);
162238
WRITE_ONCE(prev->next, node);
163239

164240
arch_mcs_spin_lock_contended(&node->locked);
165241
}
166242

167243
/*
168-
* we're at the head of the waitqueue, wait for the owner to go away.
244+
* we're at the head of the waitqueue, wait for the owner & pending to
245+
* go away.
169246
*
170-
* *,x -> *,0
247+
* *,x,y -> *,0,0
171248
*/
172-
while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
249+
while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)
173250
cpu_relax();
174251

175252
/*
176253
* claim the lock:
177254
*
178-
* n,0 -> 0,1 : lock, uncontended
179-
* *,0 -> *,1 : lock, contended
255+
* n,0,0 -> 0,0,1 : lock, uncontended
256+
* *,0,0 -> *,0,1 : lock, contended
180257
*/
181258
for (;;) {
182259
new = _Q_LOCKED_VAL;

0 commit comments

Comments
 (0)