Skip to content

Commit

Permalink
Remove tcmalloc_spinlock_contention metric
Browse files Browse the repository at this point in the history
Spinlock contention was removed from tcmalloc upstream, so this metric
has been set to 0 since we upgraded to tcmalloc 2.6. I looked briefly at
reverting its removal upstream, but the revert would be fairly large, so
I think it's best to just accept the loss of this instrumentation.

Change-Id: I2c29b81a2c692d5d876d2dd5381ba2c295684324
Reviewed-on: http://gerrit.cloudera.org:8080/9595
Tested-by: Todd Lipcon <[email protected]>
Reviewed-by: Adar Dembo <[email protected]>
  • Loading branch information
toddlipcon committed Mar 13, 2018
1 parent 13155d4 commit 4889a91
Show file tree
Hide file tree
Showing 7 changed files with 7 additions and 103 deletions.
2 changes: 2 additions & 0 deletions docs/release_notes.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
[[rn_1.7.0_obsoletions]]
== Obsoletions

* The `tcmalloc_contention_time` metric, which previously tracked the amount
of time spent in memory allocator lock contention, has been removed.

[[rn_1.7.0_deprecations]]
== Deprecations
Expand Down
1 change: 0 additions & 1 deletion src/kudu/scripts/parse_metrics_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
# ("server.voluntary_context_switches", "vol_cs"),
# ("tablet.rows_inserted", "inserts_per_sec"),
# ("tablet.rows_upserted", "upserts_per_sec"),
# ("server.tcmalloc_contention_time", "tcmalloc_contention_time")
]

# These metrics will be extracted as percentile metrics into the TSV.
Expand Down
10 changes: 0 additions & 10 deletions src/kudu/util/spinlock_profiling-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,6 @@ namespace gutil {
extern void SubmitSpinLockProfileData(const void *, int64);
} // namespace gutil

namespace base {
extern void SubmitSpinLockProfileData(const void *, int64);
} // namespace base

namespace kudu {

class SpinLockProfilingTest : public KuduTest {};
Expand Down Expand Up @@ -82,10 +78,4 @@ TEST_F(SpinLockProfilingTest, TestStackCollection) {
ASSERT_EQ(0, dropped);
}

TEST_F(SpinLockProfilingTest, TestTcmallocContention) {
StartSynchronizationProfiling();
base::SubmitSpinLockProfileData(nullptr, 12345);
ASSERT_GE(GetTcmallocContentionMicros(), 0);
}

} // namespace kudu
47 changes: 0 additions & 47 deletions src/kudu/util/spinlock_profiling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
#include "kudu/util/metrics.h"
#include "kudu/util/striped64.h"
#include "kudu/util/trace.h"
#include "kudu/util/trace_metrics.h"

DEFINE_int32(lock_contention_trace_threshold_cycles,
2000000, // 2M cycles should be about 1ms
Expand All @@ -53,12 +52,6 @@ METRIC_DEFINE_gauge_uint64(server, spinlock_contention_time,
"started. If this increases rapidly, it may indicate a performance issue in Kudu "
"internals triggered by a particular workload and warrant investigation.",
kudu::EXPOSE_AS_COUNTER);
METRIC_DEFINE_gauge_uint64(server, tcmalloc_contention_time,
"TCMalloc Contention Time", kudu::MetricUnit::kMicroseconds,
"Amount of time consumed by contention on tcmalloc's locks since the server "
"started. If this increases rapidly, it may indicate a performance issue in Kudu "
"internals triggered by a particular workload and warrant investigation.",
kudu::EXPOSE_AS_COUNTER);


using base::SpinLock;
Expand All @@ -72,18 +65,6 @@ static LongAdder* g_contended_cycles = nullptr;

namespace {

// We can't use LongAdder for tcmalloc contention, because its
// implementation can allocate memory, and doing so is prohibited
// in the tcmalloc contention callback.
//
// We pad and align this struct to two cachelines to avoid any false
// sharing with the g_contended_cycles counter or any other globals.
struct PaddedAtomic64 {
Atomic64 val;
char padding[CACHELINE_SIZE * 2 - sizeof(Atomic64)];
} CACHELINE_ALIGNED;
static PaddedAtomic64 g_tcmalloc_contention;

// Implements a very simple linear-probing hashtable of stack traces with
// a fixed number of entries.
//
Expand Down Expand Up @@ -291,10 +272,6 @@ void RegisterSpinLockContentionMetrics(const scoped_refptr<MetricEntity>& entity
entity->NeverRetire(
METRIC_spinlock_contention_time.InstantiateFunctionGauge(
entity, Bind(&GetSpinLockContentionMicros)));
entity->NeverRetire(
METRIC_tcmalloc_contention_time.InstantiateFunctionGauge(
entity, Bind(&GetTcmallocContentionMicros)));

}

uint64_t GetSpinLockContentionMicros() {
Expand All @@ -304,13 +281,6 @@ uint64_t GetSpinLockContentionMicros() {
return implicit_cast<int64_t>(micros);
}

uint64_t GetTcmallocContentionMicros() {
int64_t wait_cycles = base::subtle::NoBarrier_Load(&g_tcmalloc_contention.val);
double micros = static_cast<double>(wait_cycles) / base::CyclesPerSecond()
* kMicrosPerSecond;
return implicit_cast<int64_t>(micros);
}

void StartSynchronizationProfiling() {
InitSpinLockContentionProfiling();
base::subtle::Barrier_AtomicIncrement(&g_profiling_enabled, 1);
Expand All @@ -335,20 +305,3 @@ void SubmitSpinLockProfileData(const void *contendedlock, int64_t wait_cycles) {
kudu::SubmitSpinLockProfileData(contendedlock, wait_cycles);
}
} // namespace gutil

// tcmalloc's internal spinlocks also support submitting contention metrics
// using the base::SubmitSpinLockProfileData weak symbol. However, this function might be
// called while tcmalloc's heap lock is held. Thus, we cannot allocate memory here or else
// we risk a deadlock. So, this implementation just does the bare minimum to expose
// tcmalloc contention.
namespace base {
void SubmitSpinLockProfileData(const void* /* contendedlock */, int64_t wait_cycles) {
#if !defined(__APPLE__)
auto t = kudu::Trace::CurrentTrace();
if (t) {
t->metrics()->IncrementTcmallocContentionCycles(wait_cycles);
}
#endif // !defined(__APPLE__)
base::subtle::NoBarrier_AtomicIncrement(&kudu::g_tcmalloc_contention.val, wait_cycles);
}
} // namespace base
4 changes: 0 additions & 4 deletions src/kudu/util/spinlock_profiling.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,6 @@ void InitSpinLockContentionProfiling();
// since the server started.
uint64_t GetSpinLockContentionMicros();

// Return the total number of microseconds spent in tcmalloc contention
// since the server started.
uint64_t GetTcmallocContentionMicros();

// Register metrics in the given server entity which measure the amount of
// spinlock contention.
void RegisterSpinLockContentionMetrics(const scoped_refptr<MetricEntity>& entity);
Expand Down
26 changes: 3 additions & 23 deletions src/kudu/util/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,30 +256,10 @@ class ScopedAdoptTrace {
}

~ScopedAdoptTrace() {
auto t = Trace::threadlocal_trace_;
Trace::threadlocal_trace_ = old_trace_;

// It's critical that we Release() the reference count on 't' only
// after we've unset the thread-local variable. Otherwise, we can hit
// a nasty interaction with tcmalloc contention profiling. Consider
// the following sequence:
//
// 1. threadlocal_trace_ has refcount = 1
// 2. we call threadlocal_trace_->Release() which decrements refcount to 0
// 3. this calls 'delete' on the Trace object
// 3a. this calls tcmalloc free() on the Trace and various sub-objects
// 3b. the free() calls may end up experiencing contention in tcmalloc
// 3c. we try to account the contention in threadlocal_trace_'s TraceMetrics,
// but it has already been freed.
//
// In the best case, we just scribble into some free tcmalloc memory. In the
// worst case, tcmalloc would have already re-used this memory for a new
// allocation on another thread, and we end up overwriting someone else's memory.
//
// Waiting to Release() only after 'unpublishing' the trace solves this.
if (t) {
t->Release();
if (Trace::threadlocal_trace_) {
Trace::threadlocal_trace_->Release();
}
Trace::threadlocal_trace_ = old_trace_;
DFAKE_SCOPED_LOCK_THREAD_LOCKED(ctor_dtor_);
}

Expand Down
20 changes: 2 additions & 18 deletions src/kudu/util/trace_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace kudu {
// are plausible.
class TraceMetrics {
public:
TraceMetrics() : tcmalloc_contention_cycles_(0) {}
TraceMetrics() {}
~TraceMetrics() {}

// Internalize the given string by duplicating it into a process-wide
Expand All @@ -59,14 +59,6 @@ class TraceMetrics {
// Return a copy of the current counter map.
std::map<const char*, int64_t> Get() const;

// Increment the number of cycles spent in tcmalloc lock contention.
//
// tcmalloc contention is stored separately from other metrics since
// it is incremented from a code path that prohibits allocation.
void IncrementTcmallocContentionCycles(int64_t cycles) {
tcmalloc_contention_cycles_.IncrementBy(cycles);
}

// Return metric's current value.
//
// NOTE: the 'name' MUST be the same const char* which is used for
Expand All @@ -76,7 +68,6 @@ class TraceMetrics {
private:
mutable simple_spinlock lock_;
std::map<const char*, int64_t> counters_;
AtomicInt<int64_t> tcmalloc_contention_cycles_;

DISALLOW_COPY_AND_ASSIGN(TraceMetrics);
};
Expand All @@ -88,14 +79,7 @@ inline void TraceMetrics::Increment(const char* name, int64_t amount) {

inline std::map<const char*, int64_t> TraceMetrics::Get() const {
std::unique_lock<simple_spinlock> l(lock_);
auto m = counters_;
l.unlock();

auto v = tcmalloc_contention_cycles_.Load();
if (v > 0) {
m["tcmalloc_contention_cycles"] = v;
}
return m;
return counters_;
}

inline int64_t TraceMetrics::GetMetric(const char* name) const {
Expand Down

0 comments on commit 4889a91

Please sign in to comment.