Skip to content

Commit

Permalink
[scudo] Fine tune busy-waiting in HybridMutex
Browse files Browse the repository at this point in the history
Instead of using hardware specific instruction, using simple loop over
volatile variable gives similar and more predicatable waiting time. Also
fine tune the waiting time to fit with the average time in malloc/free
operations.

Reviewed By: cferris

Differential Revision: https://reviews.llvm.org/D156951
  • Loading branch information
ChiaHungDuan committed Sep 21, 2023
1 parent 846eb76 commit cde307e
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 18 deletions.
15 changes: 0 additions & 15 deletions compiler-rt/lib/scudo/standalone/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,21 +112,6 @@ template <typename T> inline void shuffle(T *A, u32 N, u32 *RandState) {
*RandState = State;
}

// Hardware specific inlinable functions.

inline void yieldProcessor(UNUSED u8 Count) {
#if defined(__i386__) || defined(__x86_64__)
__asm__ __volatile__("" ::: "memory");
for (u8 I = 0; I < Count; I++)
__asm__ __volatile__("pause");
#elif defined(__aarch64__) || defined(__arm__)
__asm__ __volatile__("" ::: "memory");
for (u8 I = 0; I < Count; I++)
__asm__ __volatile__("yield");
#endif
__asm__ __volatile__("" ::: "memory");
}

// Platform specific functions.

extern uptr PageSizeCached;
Expand Down
17 changes: 14 additions & 3 deletions compiler-rt/lib/scudo/standalone/mutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class CAPABILITY("mutex") HybridMutex {
#pragma nounroll
#endif
for (u8 I = 0U; I < NumberOfTries; I++) {
yieldProcessor(NumberOfYields);
delayLoop();
if (tryLock())
return;
}
Expand All @@ -53,10 +53,21 @@ class CAPABILITY("mutex") HybridMutex {
}

private:
void delayLoop() {
// The value comes from the average time spent in accessing caches (which
// are the fastest operations) so that we are unlikely to wait too long for
// fast operations.
constexpr u32 SpinTimes = 16;
volatile u32 V = 0;
for (u32 I = 0; I < SpinTimes; ++I)
++V;
}

void assertHeldImpl();

static constexpr u8 NumberOfTries = 8U;
static constexpr u8 NumberOfYields = 8U;
// TODO(chiahungduan): Adapt this value based on scenarios. E.g., primary and
// secondary allocator have different allocation times.
static constexpr u8 NumberOfTries = 32U;

#if SCUDO_LINUX
atomic_u32 M = {};
Expand Down

0 comments on commit cde307e

Please sign in to comment.