Skip to content

Commit d7c6ca0

Browse files
committedJan 28, 2015
More attempts at optimizing, and fixed bug introduced in previous commit
1 parent ccb53c6 commit d7c6ca0

File tree

5 files changed

+54
-42
lines changed

5 files changed

+54
-42
lines changed
 

‎.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@ benchmarks/msvc10/obj/
2626
benchmarks/msvc12/Debug/
2727
benchmarks/msvc12/Release/
2828
benchmarks/msvc12/obj/
29-
29+
test/

‎benchmarks/bench.cpp

+10-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// ©2013-2014 Cameron Desrochers.
1+
// ©2013-2015 Cameron Desrochers.
22
// Distributed under the simplified BSD license (see the LICENSE file that
33
// should have come with this file).
44

@@ -229,7 +229,7 @@ double runBenchmark(BenchmarkType benchmark, unsigned int randomSeed, double& ou
229229
forceNoOptimizeDummy = total;
230230
} break;
231231
case bench_empty_remove: {
232-
const counter_t MAX = 400 * 1000;
232+
const counter_t MAX = 2000 * 1000;
233233
out_Ops = MAX;
234234
TQueue q(MAX);
235235
int total = 0;
@@ -257,7 +257,7 @@ double runBenchmark(BenchmarkType benchmark, unsigned int randomSeed, double& ou
257257
forceNoOptimizeDummy = total;
258258
} break;
259259
case bench_single_threaded: {
260-
const counter_t MAX = 80 * 1000;
260+
const counter_t MAX = 200 * 1000;
261261
out_Ops = MAX;
262262
RNG_t rng(randomSeed);
263263
std::uniform_int_distribution<int> rand(0, 1);
@@ -278,16 +278,16 @@ double runBenchmark(BenchmarkType benchmark, unsigned int randomSeed, double& ou
278278
forceNoOptimizeDummy = (int)(q.try_dequeue(element));
279279
} break;
280280
case bench_mostly_add: {
281-
const counter_t MAX = 120 * 1000;
281+
const counter_t MAX = 1200 * 1000;
282282
out_Ops = MAX;
283283
int readOps = 0;
284284
RNG_t rng(randomSeed);
285-
std::uniform_int_distribution<int> rand(0, 7);
285+
std::uniform_int_distribution<int> rand(0, 3);
286286
TQueue q(MAX);
287287
int element = -1;
288288
start = getSystemTime();
289289
SimpleThread consumer([&]() {
290-
for (counter_t i = 0; i != MAX; ++i) {
290+
for (counter_t i = 0; i != MAX / 10; ++i) {
291291
if (rand(rng) == 0) {
292292
q.try_dequeue(element);
293293
++readOps;
@@ -308,11 +308,11 @@ double runBenchmark(BenchmarkType benchmark, unsigned int randomSeed, double& ou
308308
out_Ops += readOps;
309309
} break;
310310
case bench_mostly_remove: {
311-
const counter_t MAX = 120 * 1000;
311+
const counter_t MAX = 1200 * 1000;
312312
out_Ops = MAX;
313313
int writeOps = 0;
314314
RNG_t rng(randomSeed);
315-
std::uniform_int_distribution<int> rand(0, 7);
315+
std::uniform_int_distribution<int> rand(0, 3);
316316
TQueue q(MAX);
317317
int element = -1;
318318
start = getSystemTime();
@@ -323,7 +323,7 @@ double runBenchmark(BenchmarkType benchmark, unsigned int randomSeed, double& ou
323323
});
324324
SimpleThread producer([&]() {
325325
int num = 0;
326-
for (counter_t i = 0; i != MAX; ++i) {
326+
for (counter_t i = 0; i != MAX / 10; ++i) {
327327
if (rand(rng) == 0) {
328328
q.enqueue(num);
329329
++num;
@@ -338,7 +338,7 @@ double runBenchmark(BenchmarkType benchmark, unsigned int randomSeed, double& ou
338338
out_Ops += writeOps;
339339
} break;
340340
case bench_heavy_concurrent: {
341-
const counter_t MAX = 300 * 1000;
341+
const counter_t MAX = 1000 * 1000;
342342
out_Ops = MAX * 2;
343343
TQueue q(MAX);
344344
int element = -1;

‎benchmarks/makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ endif
1111
default: benchmarks$(EXT)
1212

1313
benchmarks$(EXT): bench.cpp ../readerwriterqueue.h ../atomicops.h ext/1024cores/spscqueue.h ext/folly/ProducerConsumerQueue.h ../tests/common/simplethread.h ../tests/common/simplethread.cpp systemtime.h systemtime.cpp makefile
14-
g++ -std=c++11 -Wpedantic -Wall -DNDEBUG -O3 $(PLATFORM_OPTS) bench.cpp ../tests/common/simplethread.cpp systemtime.cpp -o benchmarks$(EXT) -pthread
14+
g++ -std=c++11 -Wpedantic -Wall -DNDEBUG -O3 -g $(PLATFORM_OPTS) bench.cpp ../tests/common/simplethread.cpp systemtime.cpp -o benchmarks$(EXT) -pthread
1515

1616
run: benchmarks$(EXT)
1717
./benchmarks$(EXT)

‎readerwriterqueue.h

+23-29
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// ©2013 Cameron Desrochers.
1+
// ©2013-2015 Cameron Desrochers.
22
// Distributed under the simplified BSD license (see the license file that
33
// should have come with this header).
44

@@ -75,6 +75,8 @@ class ReaderWriterQueue
7575
assert(MAX_BLOCK_SIZE == ceilToPow2(MAX_BLOCK_SIZE) && "MAX_BLOCK_SIZE must be a power of 2");
7676
assert(MAX_BLOCK_SIZE >= 2 && "MAX_BLOCK_SIZE must be at least 2");
7777

78+
Block* firstBlock = nullptr;
79+
7880
largestBlockSize = ceilToPow2(maxSize + 1); // We need a spare slot to fit maxSize elements in the block
7981
if (largestBlockSize > MAX_BLOCK_SIZE * 2) {
8082
// We need a spare block in case the producer is writing to a different block the consumer is reading from, and
@@ -84,7 +86,6 @@ class ReaderWriterQueue
8486
// number of blocks - 1. Solving for maxSize and applying a ceiling to the division gives us (after simplifying):
8587
size_t initialBlockCount = (maxSize + MAX_BLOCK_SIZE * 2 - 3) / (MAX_BLOCK_SIZE - 1);
8688
largestBlockSize = MAX_BLOCK_SIZE;
87-
Block* firstBlock = nullptr;
8889
Block* lastBlock = nullptr;
8990
for (size_t i = 0; i != initialBlockCount; ++i) {
9091
auto block = make_block(largestBlockSize);
@@ -100,18 +101,16 @@ class ReaderWriterQueue
100101
lastBlock = block;
101102
block->next = firstBlock;
102103
}
103-
frontBlock = firstBlock;
104-
tailBlock = lastBlock;
105104
}
106105
else {
107-
auto firstBlock = make_block(largestBlockSize);
106+
firstBlock = make_block(largestBlockSize);
108107
if (firstBlock == nullptr) {
109108
throw std::bad_alloc();
110109
}
111110
firstBlock->next = firstBlock;
112-
frontBlock = firstBlock;
113-
tailBlock = firstBlock;
114111
}
112+
frontBlock = firstBlock;
113+
tailBlock = firstBlock;
115114

116115
// Make sure the reader/writer threads will have the initialized memory setup above:
117116
fence(memory_order_sync);
@@ -132,7 +131,7 @@ class ReaderWriterQueue
132131
size_t blockFront = block->front;
133132
size_t blockTail = block->tail;
134133

135-
for (size_t i = blockFront; i != blockTail; i = (i + 1) & block->sizeMask()) {
134+
for (size_t i = blockFront; i != blockTail; i = (i + 1) & block->sizeMask) {
136135
auto element = reinterpret_cast<T*>(block->data + i * sizeof(T));
137136
element->~T();
138137
(void)element;
@@ -208,11 +207,10 @@ class ReaderWriterQueue
208207
// block has advanced.
209208

210209
Block* frontBlock_ = frontBlock.load();
211-
Block* tailBlock_;
212210
size_t blockTail = frontBlock_->localTail;
213211
size_t blockFront = frontBlock_->front.load();
214212

215-
if (blockFront != blockTail || blockFront != (blockTail = frontBlock_->localTail = frontBlock_->tail.load())) {
213+
if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
216214
fence(memory_order_acquire);
217215

218216
non_empty_front_block:
@@ -221,12 +219,12 @@ class ReaderWriterQueue
221219
result = std::move(*element);
222220
element->~T();
223221

224-
blockFront = (blockFront + 1) & frontBlock_->sizeMask();
222+
blockFront = (blockFront + 1) & frontBlock_->sizeMask;
225223

226224
fence(memory_order_release);
227225
frontBlock_->front = blockFront;
228226
}
229-
else if (frontBlock_ != (tailBlock_ = tailBlock.load())) {
227+
else if (frontBlock_ != tailBlock.load()) {
230228
fence(memory_order_acquire);
231229

232230
frontBlock_ = frontBlock.load();
@@ -265,7 +263,7 @@ class ReaderWriterQueue
265263
result = std::move(*element);
266264
element->~T();
267265

268-
nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask();
266+
nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
269267

270268
fence(memory_order_release);
271269
frontBlock_->front = nextBlockFront;
@@ -292,16 +290,15 @@ class ReaderWriterQueue
292290
// See try_dequeue() for reasoning
293291

294292
Block* frontBlock_ = frontBlock.load();
295-
Block* tailBlock_;
296293
size_t blockTail = frontBlock_->localTail;
297294
size_t blockFront = frontBlock_->front.load();
298295

299-
if (blockFront != blockTail || blockFront != (blockTail = frontBlock_->localTail = frontBlock_->tail.load())) {
296+
if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
300297
fence(memory_order_acquire);
301298
non_empty_front_block:
302299
return reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
303300
}
304-
else if (frontBlock_ != (tailBlock_ = tailBlock.load())) {
301+
else if (frontBlock_ != tailBlock.load()) {
305302
fence(memory_order_acquire);
306303
frontBlock_ = frontBlock.load();
307304
blockTail = frontBlock_->localTail = frontBlock_->tail.load();
@@ -335,23 +332,22 @@ class ReaderWriterQueue
335332
// See try_dequeue() for reasoning
336333

337334
Block* frontBlock_ = frontBlock.load();
338-
Block* tailBlock_;
339335
size_t blockTail = frontBlock_->localTail;
340336
size_t blockFront = frontBlock_->front.load();
341337

342-
if (blockFront != blockTail || blockFront != (blockTail = frontBlock_->localTail = frontBlock_->tail.load())) {
338+
if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
343339
fence(memory_order_acquire);
344340

345341
non_empty_front_block:
346342
auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
347343
element->~T();
348344

349-
blockFront = (blockFront + 1) & frontBlock_->sizeMask();
345+
blockFront = (blockFront + 1) & frontBlock_->sizeMask;
350346

351347
fence(memory_order_release);
352348
frontBlock_->front = blockFront;
353349
}
354-
else if (frontBlock_ != (tailBlock_ = tailBlock.load())) {
350+
else if (frontBlock_ != tailBlock.load()) {
355351
fence(memory_order_acquire);
356352
frontBlock_ = frontBlock.load();
357353
blockTail = frontBlock_->localTail = frontBlock_->tail.load();
@@ -380,7 +376,7 @@ class ReaderWriterQueue
380376
auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
381377
element->~T();
382378

383-
nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask();
379+
nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
384380

385381
fence(memory_order_release);
386382
frontBlock_->front = nextBlockFront;
@@ -404,7 +400,7 @@ class ReaderWriterQueue
404400
fence(memory_order_acquire);
405401
size_t blockFront = block->front.load();
406402
size_t blockTail = block->tail.load();
407-
result += (blockTail - blockFront) & block->sizeMask();
403+
result += (blockTail - blockFront) & block->sizeMask;
408404
block = block->next.load();
409405
} while (block != frontBlock_);
410406
return result;
@@ -432,8 +428,8 @@ class ReaderWriterQueue
432428
size_t blockFront = tailBlock_->localFront;
433429
size_t blockTail = tailBlock_->tail.load();
434430

435-
size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask();
436-
if (nextBlockTail != blockFront || nextBlockTail != (blockFront = tailBlock_->localFront = tailBlock_->front.load())) {
431+
size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask;
432+
if (nextBlockTail != blockFront || nextBlockTail != (tailBlock_->localFront = tailBlock_->front.load())) {
437433
fence(memory_order_acquire);
438434
// This block has room for at least one more element
439435
char* location = tailBlock_->data + blockTail * sizeof(T);
@@ -466,7 +462,7 @@ class ReaderWriterQueue
466462
char* location = tailBlockNext->data + nextBlockTail * sizeof(T);
467463
new (location) T(std::forward<U>(element));
468464

469-
tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask();
465+
tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask;
470466

471467
fence(memory_order_release);
472468
tailBlock = tailBlockNext;
@@ -580,14 +576,12 @@ class ReaderWriterQueue
580576

581577
char* data; // Contents (on heap) are aligned to T's alignment
582578

583-
const size_t size;
584-
585-
AE_FORCEINLINE size_t sizeMask() const { return size - 1; }
579+
const size_t sizeMask;
586580

587581

588582
// size must be a power of two (and greater than 0)
589583
Block(size_t const& _size, char* rawThis, char* _data)
590-
: front(0), localTail(0), tail(0), localFront(0), next(nullptr), data(_data), size(_size), rawThis(rawThis)
584+
: front(0), localTail(0), tail(0), localFront(0), next(nullptr), data(_data), sizeMask(_size - 1), rawThis(rawThis)
591585
{
592586
}
593587

‎tests/unittests/unittests.cpp

+19-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// ©2013 Cameron Desrochers
1+
// ©2013-2015 Cameron Desrochers
22
// Unit tests for moodycamel::ReaderWriterQueue
33

44
#include <cstdio>
@@ -53,6 +53,7 @@ class ReaderWriterQueueTests : public TestClass<ReaderWriterQueueTests>
5353
REGISTER_TEST(enqueue_many);
5454
REGISTER_TEST(nonempty_destroy);
5555
REGISTER_TEST(try_enqueue);
56+
REGISTER_TEST(try_dequeue);
5657
REGISTER_TEST(peek);
5758
REGISTER_TEST(pop);
5859
REGISTER_TEST(size_approx);
@@ -241,6 +242,23 @@ class ReaderWriterQueueTests : public TestClass<ReaderWriterQueueTests>
241242
return true;
242243
}
243244

245+
bool try_dequeue()
246+
{
247+
int item;
248+
249+
{
250+
ReaderWriterQueue<int> q(1);
251+
ASSERT_OR_FAIL(!q.try_dequeue(item));
252+
}
253+
254+
{
255+
ReaderWriterQueue<int, 2> q(10);
256+
ASSERT_OR_FAIL(!q.try_dequeue(item));
257+
}
258+
259+
return true;
260+
}
261+
244262
bool threaded()
245263
{
246264
weak_atomic<int> result;

0 commit comments

Comments
 (0)
Please sign in to comment.