Skip to content

Commit

Permalink
#69 measure long tail lock tree and cache table stalls
Browse files Browse the repository at this point in the history
  • Loading branch information
prohaska committed Sep 18, 2013
1 parent 2004b5b commit df28359
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 2 deletions.
4 changes: 4 additions & 0 deletions ft/cachetable-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,10 @@ class evictor {
PARTITIONED_COUNTER m_size_leaf;
PARTITIONED_COUNTER m_size_rollback;
PARTITIONED_COUNTER m_size_cachepressure;
PARTITIONED_COUNTER m_wait_pressure_count;
PARTITIONED_COUNTER m_wait_pressure_time;
PARTITIONED_COUNTER m_long_wait_pressure_count;
PARTITIONED_COUNTER m_long_wait_pressure_time;

KIBBUTZ m_kibbutz;

Expand Down
28 changes: 27 additions & 1 deletion ft/cachetable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@ status_init(void) {
STATUS_INIT(CT_CLEANER_EXECUTIONS, CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_CLEANER_PERIOD, CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_CLEANER_ITERATIONS, CACHETABLE_CLEANER_ITERATIONS, UINT64, "cleaner iterations", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);

STATUS_INIT(CT_WAIT_PRESSURE_COUNT, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "number of waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_WAIT_PRESSURE_TIME, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_LONG_WAIT_PRESSURE_COUNT, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "number of long waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_LONG_WAIT_PRESSURE_TIME, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "long time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
ct_status.initialized = true;
}
#undef STATUS_INIT
Expand Down Expand Up @@ -3652,6 +3657,10 @@ void evictor::init(long _size_limit, pair_list* _pl, KIBBUTZ _kibbutz, uint32_t
m_size_leaf = create_partitioned_counter();
m_size_rollback = create_partitioned_counter();
m_size_cachepressure = create_partitioned_counter();
m_wait_pressure_count = create_partitioned_counter();
m_wait_pressure_time = create_partitioned_counter();
m_long_wait_pressure_count = create_partitioned_counter();
m_long_wait_pressure_time = create_partitioned_counter();

m_pl = _pl;
m_kibbutz = _kibbutz;
Expand Down Expand Up @@ -3706,6 +3715,11 @@ void evictor::destroy() {
destroy_partitioned_counter(m_size_cachepressure);
m_size_cachepressure = NULL;

destroy_partitioned_counter(m_wait_pressure_count); m_wait_pressure_count = NULL;
destroy_partitioned_counter(m_wait_pressure_time); m_wait_pressure_time = NULL;
destroy_partitioned_counter(m_long_wait_pressure_count); m_long_wait_pressure_count = NULL;
destroy_partitioned_counter(m_long_wait_pressure_time); m_long_wait_pressure_time = NULL;

toku_cond_destroy(&m_flow_control_cond);
toku_cond_destroy(&m_ev_thread_cond);
toku_mutex_destroy(&m_ev_thread_lock);
Expand Down Expand Up @@ -4173,13 +4187,21 @@ void evictor::decrease_size_evicting(long size_evicting_estimate) {
// size_evicting is number of bytes queued up to be evicted
//
void evictor::wait_for_cache_pressure_to_subside() {
uint64_t t0 = toku_current_time_microsec();
toku_mutex_lock(&m_ev_thread_lock);
m_num_sleepers++;
this->signal_eviction_thread();
toku_cond_wait(&m_flow_control_cond, &m_ev_thread_lock);
m_num_sleepers--;
toku_mutex_unlock(&m_ev_thread_lock);

uint64_t t1 = toku_current_time_microsec();
increment_partitioned_counter(m_wait_pressure_count, 1);
uint64_t tdelta = t1 - t0;
increment_partitioned_counter(m_wait_pressure_time, tdelta);
if (tdelta > 1000000) {
increment_partitioned_counter(m_long_wait_pressure_count, 1);
increment_partitioned_counter(m_long_wait_pressure_time, tdelta);
}
}

//
Expand Down Expand Up @@ -4267,6 +4289,10 @@ void evictor::fill_engine_status() {
STATUS_VALUE(CT_SIZE_LEAF) = read_partitioned_counter(m_size_leaf);
STATUS_VALUE(CT_SIZE_ROLLBACK) = read_partitioned_counter(m_size_rollback);
STATUS_VALUE(CT_SIZE_CACHEPRESSURE) = read_partitioned_counter(m_size_cachepressure);
STATUS_VALUE(CT_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_wait_pressure_count);
STATUS_VALUE(CT_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_wait_pressure_time);
STATUS_VALUE(CT_LONG_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_long_wait_pressure_count);
STATUS_VALUE(CT_LONG_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_long_wait_pressure_time);
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
4 changes: 4 additions & 0 deletions ft/cachetable.h
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,10 @@ typedef enum {
CT_CLEANER_EXECUTIONS, // number of times the cleaner thread's loop has executed
CT_CLEANER_PERIOD,
CT_CLEANER_ITERATIONS, // number of times the cleaner thread runs the cleaner per period
CT_WAIT_PRESSURE_COUNT,
CT_WAIT_PRESSURE_TIME,
CT_LONG_WAIT_PRESSURE_COUNT,
CT_LONG_WAIT_PRESSURE_TIME,
CT_STATUS_NUM_ROWS
} ct_status_entry;

Expand Down
10 changes: 10 additions & 0 deletions locktree/locktree.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ enum {
LTM_LONG_WAIT_COUNT,
LTM_LONG_WAIT_TIME,
LTM_TIMEOUT_COUNT,
LTM_WAIT_ESCALATION_COUNT,
LTM_WAIT_ESCALATION_TIME,
LTM_LONG_WAIT_ESCALATION_COUNT,
LTM_LONG_WAIT_ESCALATION_TIME,
LTM_STATUS_NUM_ROWS // must be last
};

Expand Down Expand Up @@ -306,6 +310,8 @@ class locktree {
void *extra);
int iterate_pending_lock_requests(lock_request_iterate_callback cb, void *extra);

void escalation_wait(uint64_t t);

private:
static const uint64_t DEFAULT_MAX_LOCK_MEMORY = 64L * 1024 * 1024;
static const uint64_t DEFAULT_LOCK_WAIT_TIME = 0;
Expand All @@ -319,6 +325,10 @@ class locktree {
uint64_t m_escalation_count;
tokutime_t m_escalation_time;
uint64_t m_escalation_latest_result;
uint64_t m_wait_escalation_count;
uint64_t m_wait_escalation_time;
uint64_t m_long_wait_escalation_count;
uint64_t m_long_wait_escalation_time;

struct lt_counters m_lt_counters;

Expand Down
27 changes: 26 additions & 1 deletion locktree/manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb,
m_current_lock_memory = 0;
m_escalation_count = 0;
m_escalation_time = 0;
m_wait_escalation_count = 0;
m_wait_escalation_time = 0;
m_long_wait_escalation_count = 0;
m_long_wait_escalation_time = 0;
m_escalation_latest_result = 0;
m_lock_wait_time_ms = DEFAULT_LOCK_WAIT_TIME;
m_mem_tracker.set_manager(this);
Expand Down Expand Up @@ -350,13 +354,16 @@ int locktree::manager::memory_tracker::check_current_lock_constraints(void) {
// mutex and check again. if we're still out of locks, run escalation.
// return an error if we're still out of locks after escalation.
if (out_of_locks()) {
uint64_t t0 = toku_current_time_microsec();
m_mgr->mutex_lock();
if (out_of_locks()) {
m_mgr->run_escalation();
if (out_of_locks()) {
r = TOKUDB_OUT_OF_LOCKS;
}
}
uint64_t t1 = toku_current_time_microsec();
m_mgr->escalation_wait(t1 - t0);
m_mgr->mutex_unlock();
}
return r;
Expand Down Expand Up @@ -404,6 +411,15 @@ int locktree::manager::iterate_pending_lock_requests(
return r;
}

void locktree::manager::escalation_wait(uint64_t t) {
m_wait_escalation_count += 1;
m_wait_escalation_time += t;
if (t >= 1000000) {
m_long_wait_escalation_count += 1;
m_long_wait_escalation_time += t;
}
}

#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(status, k, c, t, "locktree: " l, inc)

void locktree::manager::status_init(void) {
Expand All @@ -420,10 +436,15 @@ void locktree::manager::status_init(void) {

STATUS_INIT(LTM_WAIT_COUNT, LOCKTREE_WAIT_COUNT, UINT64, "number of wait locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_WAIT_TIME, LOCKTREE_WAIT_TIME, UINT64, "time waiting for locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_COUNT, LOCKTREE_LONG_WAIT_COUNT, UINT64, "number of long wait locks ", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_COUNT, LOCKTREE_LONG_WAIT_COUNT, UINT64, "number of long wait locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_TIME, LOCKTREE_LONG_WAIT_TIME, UINT64, "long time waiting for locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_TIMEOUT_COUNT, LOCKTREE_TIMEOUT_COUNT, UINT64, "number of lock timeouts", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);

STATUS_INIT(LTM_WAIT_ESCALATION_COUNT, LOCKTREE_WAIT_ESCALATION_COUNT, UINT64, "number of waits on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_WAIT_ESCALATION_TIME, LOCKTREE_WAIT_ESCALATION_TIME, UINT64, "time waiting on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_ESCALATION_COUNT, LOCKTREE_LONG_WAIT_ESCALATION_COUNT, UINT64, "number of long waits on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_ESCALATION_TIME, LOCKTREE_LONG_WAIT_ESCALATION_TIME, UINT64, "long time waiting on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);

status.initialized = true;
}

Expand All @@ -441,6 +462,10 @@ void locktree::manager::get_status(LTM_STATUS statp) {
STATUS_VALUE(LTM_ESCALATION_COUNT) = m_escalation_count;
STATUS_VALUE(LTM_ESCALATION_TIME) = m_escalation_time;
STATUS_VALUE(LTM_ESCALATION_LATEST_RESULT) = m_escalation_latest_result;
STATUS_VALUE(LTM_WAIT_ESCALATION_COUNT) = m_wait_escalation_count;
STATUS_VALUE(LTM_WAIT_ESCALATION_TIME) = m_wait_escalation_time;
STATUS_VALUE(LTM_LONG_WAIT_ESCALATION_COUNT) = m_long_wait_escalation_count;
STATUS_VALUE(LTM_LONG_WAIT_ESCALATION_TIME) = m_long_wait_escalation_time;

mutex_lock();

Expand Down

0 comments on commit df28359

Please sign in to comment.