Skip to content

Commit

Permalink
TransactionDB Custom Locking API
Browse files Browse the repository at this point in the history
Summary:
Prototype of API to allow MyRocks to override default Mutex/CondVar used by transactions with their own implementations.  They would simply need to pass their own implementations of Mutex/CondVar to the templated TransactionDB::Open().

Default implementation of TransactionDBMutex/TransactionDBCondVar provided (but the code is not currently changed to use this).

Let me know if this API makes sense or if it should be changed

Test Plan: n/a

Reviewers: yhchiang, rven, igor, sdong, spetrunia

Reviewed By: spetrunia

Subscribers: maykov, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D43761
  • Loading branch information
agiardullo committed Sep 9, 2015
1 parent 0ccf2db commit 5e94f68
Show file tree
Hide file tree
Showing 11 changed files with 406 additions and 128 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ set(SOURCES
utilities/transactions/transaction_base.cc
utilities/transactions/transaction_impl.cc
utilities/transactions/transaction_db_impl.cc
utilities/transactions/transaction_db_mutex_impl.cc
utilities/transactions/transaction_lock_mgr.cc
utilities/transactions/transaction_util.cc
utilities/ttl/db_ttl_impl.cc
Expand Down
7 changes: 7 additions & 0 deletions include/rocksdb/utilities/transaction_db.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

namespace rocksdb {

class TransactionDBMutexFactory;

struct TransactionDBOptions {
// Specifies the maximum number of keys that can be locked at the same time
// per column family.
Expand Down Expand Up @@ -58,6 +60,11 @@ struct TransactionDBOptions {
// A negative timeout should only be used if all transactions have an small
// expiration set.
int64_t default_lock_timeout = 1000; // 1 second

// If set, the TransactionDB will use this implemenation of a mutex and
// condition variable for all transaction locking instead of the default
// mutex/condvar implementation.
std::shared_ptr<TransactionDBMutexFactory> custom_mutex_factory;
};

struct TransactionOptions {
Expand Down
92 changes: 92 additions & 0 deletions include/rocksdb/utilities/transaction_db_mutex.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright (c) 2015, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

#pragma once
#ifndef ROCKSDB_LITE

#include <memory>

#include "rocksdb/status.h"

namespace rocksdb {

// TransactionDBMutex and TransactionDBCondVar APIs allows applications to
// implement custom mutexes and condition variables to be used by a
// TransactionDB when locking keys.
//
// To open a TransactionDB with a custom TransactionDBMutexFactory, set
// TransactionDBOptions.custom_mutex_factory.

class TransactionDBMutex {
public:
virtual ~TransactionDBMutex() {}

// Attempt to acquire lock. Return OK on success, or other Status on failure.
// If returned status is OK, TransactionDB will eventually call UnLock().
virtual Status Lock() = 0;

// Attempt to acquire lock. If timeout is non-negative, operation should be
// failed after this many microseconds.
// Returns OK on success,
// TimedOut if timed out,
// or other Status on failure.
// If returned status is OK, TransactionDB will eventually call UnLock().
virtual Status TryLockFor(int64_t timeout_time) = 0;

// Unlock Mutex that was successfully locked by Lock() or TryLockUntil()
virtual void UnLock() = 0;
};

class TransactionDBCondVar {
public:
virtual ~TransactionDBCondVar() {}

// Block current thread until condition variable is notified by a call to
// Notify() or NotifyAll(). Wait() will be called with mutex locked.
// Returns OK if notified.
// Returns non-OK if TransactionDB should stop waiting and fail the operation.
// May return OK spuriously even if not notified.
virtual Status Wait(std::shared_ptr<TransactionDBMutex> mutex) = 0;

// Block current thread until condition variable is notified by a call to
// Notify() or NotifyAll(), or if the timeout is reached.
// Wait() will be called with mutex locked.
//
// If timeout is non-negative, operation should be failed after this many
// microseconds.
// If implementing a custom version of this class, the implementation may
// choose to ignore the timeout.
//
// Returns OK if notified.
// Returns TimedOut if timeout is reached.
// Returns other status if TransactionDB should otherwis stop waiting and
// fail the operation.
// May return OK spuriously even if not notified.
virtual Status WaitFor(std::shared_ptr<TransactionDBMutex> mutex,
int64_t timeout_time) = 0;

// If any threads are waiting on *this, unblock at least one of the
// waiting threads.
virtual void Notify() = 0;

// Unblocks all threads waiting on *this.
virtual void NotifyAll() = 0;
};

// Factory class that can allocate mutexes and condition variables.
class TransactionDBMutexFactory {
public:
// Create a TransactionDBMutex object.
virtual std::shared_ptr<TransactionDBMutex> AllocateMutex() = 0;

// Create a TransactionDBCondVar object.
virtual std::shared_ptr<TransactionDBCondVar> AllocateCondVar() = 0;

virtual ~TransactionDBMutexFactory() {}
};

} // namespace rocksdb

#endif // ROCKSDB_LITE
1 change: 1 addition & 0 deletions src.mk
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ LIB_SOURCES = \
utilities/transactions/optimistic_transaction_db_impl.cc \
utilities/transactions/transaction_base.cc \
utilities/transactions/transaction_db_impl.cc \
utilities/transactions/transaction_db_mutex_impl.cc \
utilities/transactions/transaction_lock_mgr.cc \
utilities/transactions/transaction_impl.cc \
utilities/transactions/transaction_util.cc \
Expand Down
11 changes: 8 additions & 3 deletions utilities/transactions/transaction_db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@

#ifndef ROCKSDB_LITE

#include "utilities/transactions/transaction_db_impl.h"

#include <string>
#include <vector>

#include "utilities/transactions/transaction_db_impl.h"

#include "db/db_impl.h"
#include "rocksdb/db.h"
#include "rocksdb/options.h"
#include "rocksdb/utilities/transaction_db.h"
#include "utilities/transactions/transaction_db_mutex_impl.h"
#include "utilities/transactions/transaction_impl.h"

namespace rocksdb {
Expand All @@ -22,7 +23,11 @@ TransactionDBImpl::TransactionDBImpl(DB* db,
const TransactionDBOptions& txn_db_options)
: TransactionDB(db),
txn_db_options_(txn_db_options),
lock_mgr_(txn_db_options_.num_stripes, txn_db_options.max_num_locks) {}
lock_mgr_(txn_db_options_.num_stripes, txn_db_options.max_num_locks,
txn_db_options_.custom_mutex_factory
? txn_db_options_.custom_mutex_factory
: std::shared_ptr<TransactionDBMutexFactory>(
new TransactionDBMutexFactoryImpl())) {}

Transaction* TransactionDBImpl::BeginTransaction(
const WriteOptions& write_options, const TransactionOptions& txn_options) {
Expand Down
121 changes: 121 additions & 0 deletions utilities/transactions/transaction_db_mutex_impl.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Copyright (c) 2015, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

#ifndef ROCKSDB_LITE

#include "utilities/transactions/transaction_db_mutex_impl.h"

#include <chrono>
#include <condition_variable>
#include <functional>
#include <mutex>

#include "rocksdb/utilities/transaction_db_mutex.h"

namespace rocksdb {

class TransactionDBMutexImpl : public TransactionDBMutex {
public:
TransactionDBMutexImpl() {}
~TransactionDBMutexImpl() {}

Status Lock() override;

Status TryLockFor(int64_t timeout_time) override;

void UnLock() override { mutex_.unlock(); }

friend class TransactionDBCondVarImpl;

private:
std::timed_mutex mutex_;
};

class TransactionDBCondVarImpl : public TransactionDBCondVar {
public:
TransactionDBCondVarImpl() {}
~TransactionDBCondVarImpl() {}

Status Wait(std::shared_ptr<TransactionDBMutex> mutex) override;

Status WaitFor(std::shared_ptr<TransactionDBMutex> mutex,
int64_t timeout_time) override;

void Notify() override { cv_.notify_one(); }

void NotifyAll() override { cv_.notify_all(); }

private:
std::condition_variable_any cv_;
};

std::shared_ptr<TransactionDBMutex>
TransactionDBMutexFactoryImpl::AllocateMutex() {
return std::shared_ptr<TransactionDBMutex>(new TransactionDBMutexImpl());
}

std::shared_ptr<TransactionDBCondVar>
TransactionDBMutexFactoryImpl::AllocateCondVar() {
return std::shared_ptr<TransactionDBCondVar>(new TransactionDBCondVarImpl());
}

Status TransactionDBMutexImpl::Lock() {
mutex_.lock();
return Status::OK();
}

Status TransactionDBMutexImpl::TryLockFor(int64_t timeout_time) {
bool locked = true;

if (timeout_time < 0) {
// If timeout is negative, we wait indefinitely to acquire the lock
mutex_.lock();
} else if (timeout_time == 0) {
locked = mutex_.try_lock();
} else {
// Attempt to acquire the lock unless we timeout
auto duration = std::chrono::microseconds(timeout_time);
locked = mutex_.try_lock_for(duration);
}

if (!locked) {
// timeout acquiring mutex
return Status::TimedOut(Status::SubCode::kMutexTimeout);
}

return Status::OK();
}

Status TransactionDBCondVarImpl::Wait(
std::shared_ptr<TransactionDBMutex> mutex) {
auto mutex_impl = reinterpret_cast<TransactionDBMutexImpl*>(mutex.get());
cv_.wait(mutex_impl->mutex_);
return Status::OK();
}

Status TransactionDBCondVarImpl::WaitFor(
std::shared_ptr<TransactionDBMutex> mutex, int64_t timeout_time) {
auto mutex_impl = reinterpret_cast<TransactionDBMutexImpl*>(mutex.get());

if (timeout_time < 0) {
// If timeout is negative, do not use a timeout
cv_.wait(mutex_impl->mutex_);
} else {
auto duration = std::chrono::microseconds(timeout_time);
auto cv_status = cv_.wait_for(mutex_impl->mutex_, duration);

// Check if the wait stopped due to timing out.
if (cv_status == std::cv_status::timeout) {
return Status::TimedOut(Status::SubCode::kMutexTimeout);
}
}

// CV was signaled, or we spuriously woke up (but didn't time out)
return Status::OK();
}

} // namespace rocksdb

#endif // ROCKSDB_LITE
26 changes: 26 additions & 0 deletions utilities/transactions/transaction_db_mutex_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (c) 2015, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

#pragma once
#ifndef ROCKSDB_LITE

#include "rocksdb/utilities/transaction_db_mutex.h"

namespace rocksdb {

class TransactionDBMutex;
class TransactionDBCondVar;

// Default implementation of TransactionDBMutexFactory. May be overridden
// by TransactionDBOptions.custom_mutex_factory.
class TransactionDBMutexFactoryImpl : public TransactionDBMutexFactory {
public:
std::shared_ptr<TransactionDBMutex> AllocateMutex() override;
std::shared_ptr<TransactionDBCondVar> AllocateCondVar() override;
};

} // namespace rocksdb

#endif // ROCKSDB_LITE
9 changes: 5 additions & 4 deletions utilities/transactions/transaction_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,16 @@ TransactionImpl::TransactionImpl(TransactionDB* txn_db,
txn_db_impl_(nullptr),
txn_id_(GenTxnID()),
expiration_time_(txn_options.expiration >= 0
? start_time_ / 1000 + txn_options.expiration
? start_time_ + txn_options.expiration * 1000
: 0),
lock_timeout_(txn_options.lock_timeout) {
lock_timeout_(txn_options.lock_timeout * 1000) {
txn_db_impl_ = dynamic_cast<TransactionDBImpl*>(txn_db);
assert(txn_db_impl_);

if (lock_timeout_ < 0) {
// Lock timeout not set, use default
lock_timeout_ = txn_db_impl_->GetTxnDBOptions().transaction_lock_timeout;
lock_timeout_ =
txn_db_impl_->GetTxnDBOptions().transaction_lock_timeout * 1000;
}

if (txn_options.set_snapshot) {
Expand All @@ -69,7 +70,7 @@ void TransactionImpl::Cleanup() {

bool TransactionImpl::IsExpired() const {
if (expiration_time_ > 0) {
if (db_->GetEnv()->NowMicros() >= expiration_time_ * 1000) {
if (db_->GetEnv()->NowMicros() >= expiration_time_) {
// Transaction is expired.
return true;
}
Expand Down
10 changes: 6 additions & 4 deletions utilities/transactions/transaction_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,20 @@ class TransactionImpl : public TransactionBaseImpl {

TransactionID GetTxnID() const { return txn_id_; }

// Returns the time (in milliseconds according to Env->GetMicros()*1000)
// Returns the time (in microseconds according to Env->GetMicros())
// that this transaction will be expired. Returns 0 if this transaction does
// not expire.
uint64_t GetExpirationTime() const { return expiration_time_; }

// returns true if this transaction has an expiration_time and has expired.
bool IsExpired() const;

// Returns the number of milliseconds a transaction can wait on acquiring a
// Returns the number of microseconds a transaction can wait on acquiring a
// lock or -1 if there is no timeout.
int64_t GetLockTimeout() const { return lock_timeout_; }
void SetLockTimeout(int64_t timeout) override { lock_timeout_ = timeout; }
void SetLockTimeout(int64_t timeout) override {
lock_timeout_ = timeout * 1000;
}

protected:
Status TryLock(ColumnFamilyHandle* column_family, const Slice& key,
Expand All @@ -76,7 +78,7 @@ class TransactionImpl : public TransactionBaseImpl {
const TransactionID txn_id_;

// If non-zero, this transaction should not be committed after this time (in
// milliseconds)
// microseconds according to Env->NowMicros())
const uint64_t expiration_time_;

// Timeout in microseconds when locking a key or -1 if there is no timeout.
Expand Down
Loading

0 comments on commit 5e94f68

Please sign in to comment.