@@ -94,24 +94,28 @@ static inline struct mcs_spinlock *decode_tail(u32 tail)
94
94
return per_cpu_ptr (& mcs_nodes [idx ], cpu );
95
95
}
96
96
97
+ #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
98
+
97
99
/**
98
100
* queued_spin_lock_slowpath - acquire the queued spinlock
99
101
* @lock: Pointer to queued spinlock structure
100
102
* @val: Current value of the queued spinlock 32-bit word
101
103
*
102
- * (queue tail, lock value)
103
- *
104
- * fast : slow : unlock
105
- * : :
106
- * uncontended (0,0) --:--> (0,1) --------------------------------:--> (*,0)
107
- * : | ^--------. / :
108
- * : v \ | :
109
- * uncontended : (n,x) --+--> (n,0) | :
110
- * queue : | ^--' | :
111
- * : v | :
112
- * contended : (*,x) --+--> (*,0) -----> (*,1) ---' :
113
- * queue : ^--' :
104
+ * (queue tail, pending bit, lock value)
114
105
*
106
+ * fast : slow : unlock
107
+ * : :
108
+ * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0)
109
+ * : | ^--------.------. / :
110
+ * : v \ \ | :
111
+ * pending : (0,1,1) +--> (0,1,0) \ | :
112
+ * : | ^--' | | :
113
+ * : v | | :
114
+ * uncontended : (n,x,y) +--> (n,0,0) --' | :
115
+ * queue : | ^--' | :
116
+ * : v | :
117
+ * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' :
118
+ * queue : ^--' :
115
119
*/
116
120
void queued_spin_lock_slowpath (struct qspinlock * lock , u32 val )
117
121
{
@@ -121,6 +125,75 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
121
125
122
126
BUILD_BUG_ON (CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS ));
123
127
128
+ /*
129
+ * wait for in-progress pending->locked hand-overs
130
+ *
131
+ * 0,1,0 -> 0,0,1
132
+ */
133
+ if (val == _Q_PENDING_VAL ) {
134
+ while ((val = atomic_read (& lock -> val )) == _Q_PENDING_VAL )
135
+ cpu_relax ();
136
+ }
137
+
138
+ /*
139
+ * trylock || pending
140
+ *
141
+ * 0,0,0 -> 0,0,1 ; trylock
142
+ * 0,0,1 -> 0,1,1 ; pending
143
+ */
144
+ for (;;) {
145
+ /*
146
+ * If we observe any contention; queue.
147
+ */
148
+ if (val & ~_Q_LOCKED_MASK )
149
+ goto queue ;
150
+
151
+ new = _Q_LOCKED_VAL ;
152
+ if (val == new )
153
+ new |= _Q_PENDING_VAL ;
154
+
155
+ old = atomic_cmpxchg (& lock -> val , val , new );
156
+ if (old == val )
157
+ break ;
158
+
159
+ val = old ;
160
+ }
161
+
162
+ /*
163
+ * we won the trylock
164
+ */
165
+ if (new == _Q_LOCKED_VAL )
166
+ return ;
167
+
168
+ /*
169
+ * we're pending, wait for the owner to go away.
170
+ *
171
+ * *,1,1 -> *,1,0
172
+ */
173
+ while ((val = atomic_read (& lock -> val )) & _Q_LOCKED_MASK )
174
+ cpu_relax ();
175
+
176
+ /*
177
+ * take ownership and clear the pending bit.
178
+ *
179
+ * *,1,0 -> *,0,1
180
+ */
181
+ for (;;) {
182
+ new = (val & ~_Q_PENDING_MASK ) | _Q_LOCKED_VAL ;
183
+
184
+ old = atomic_cmpxchg (& lock -> val , val , new );
185
+ if (old == val )
186
+ break ;
187
+
188
+ val = old ;
189
+ }
190
+ return ;
191
+
192
+ /*
193
+ * End of pending bit optimistic spinning and beginning of MCS
194
+ * queuing.
195
+ */
196
+ queue :
124
197
node = this_cpu_ptr (& mcs_nodes [0 ]);
125
198
idx = node -> count ++ ;
126
199
tail = encode_tail (smp_processor_id (), idx );
@@ -130,15 +203,18 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
130
203
node -> next = NULL ;
131
204
132
205
/*
206
+ * We have already touched the queueing cacheline; don't bother with
207
+ * pending stuff.
208
+ *
133
209
* trylock || xchg(lock, node)
134
210
*
135
- * 0,0 -> 0,1 ; no tail, not locked -> no tail, locked.
136
- * p,x -> n,x ; tail was p -> tail is n; preserving locked.
211
+ * 0,0,0 -> 0, 0,1 ; no tail, not locked -> no tail, locked.
212
+ * p,y, x -> n,y ,x ; tail was p -> tail is n; preserving locked.
137
213
*/
138
214
for (;;) {
139
215
new = _Q_LOCKED_VAL ;
140
216
if (val )
141
- new = tail | (val & _Q_LOCKED_MASK );
217
+ new = tail | (val & _Q_LOCKED_PENDING_MASK );
142
218
143
219
old = atomic_cmpxchg (& lock -> val , val , new );
144
220
if (old == val )
@@ -157,26 +233,27 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
157
233
* if there was a previous node; link it and wait until reaching the
158
234
* head of the waitqueue.
159
235
*/
160
- if (old & ~_Q_LOCKED_MASK ) {
236
+ if (old & ~_Q_LOCKED_PENDING_MASK ) {
161
237
prev = decode_tail (old );
162
238
WRITE_ONCE (prev -> next , node );
163
239
164
240
arch_mcs_spin_lock_contended (& node -> locked );
165
241
}
166
242
167
243
/*
168
- * we're at the head of the waitqueue, wait for the owner to go away.
244
+ * we're at the head of the waitqueue, wait for the owner & pending to
245
+ * go away.
169
246
*
170
- * *,x -> *,0
247
+ * *,x,y -> *,0 ,0
171
248
*/
172
- while ((val = atomic_read (& lock -> val )) & _Q_LOCKED_MASK )
249
+ while ((val = atomic_read (& lock -> val )) & _Q_LOCKED_PENDING_MASK )
173
250
cpu_relax ();
174
251
175
252
/*
176
253
* claim the lock:
177
254
*
178
- * n,0 -> 0,1 : lock, uncontended
179
- * *,0 -> *,1 : lock, contended
255
+ * n,0,0 -> 0, 0,1 : lock, uncontended
256
+ * *,0,0 -> *,0 ,1 : lock, contended
180
257
*/
181
258
for (;;) {
182
259
new = _Q_LOCKED_VAL ;
0 commit comments