Skip to content

Commit

Permalink
Limit maximum memory used in the WriteBatch representation
Browse files Browse the repository at this point in the history
Summary:
Extend TransactionOptions to include max_write_batch_size which determines the maximum size of the writebatch representation. If memory limit is exceeded, the operation will abort with subcode kMemoryLimit.
Closes facebook#2124

Differential Revision: D4861842

Pulled By: lth

fbshipit-source-id: 46fd172ea67cc90bbba829bf0d70cfab2261c161
  • Loading branch information
lth authored and facebook-github-bot committed Apr 10, 2017
1 parent 97ec8a1 commit 1f8b119
Show file tree
Hide file tree
Showing 16 changed files with 504 additions and 287 deletions.
173 changes: 106 additions & 67 deletions db/write_batch.cc

Large diffs are not rendered by default.

55 changes: 28 additions & 27 deletions db/write_batch_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,86 +8,87 @@
#include <string>

#include "rocksdb/slice.h"
#include "rocksdb/status.h"

namespace rocksdb {

// Simple implementation of SlicePart variants of Put(). Child classes
// can override these method with more performant solutions if they choose.
void WriteBatchBase::Put(ColumnFamilyHandle* column_family,
const SliceParts& key, const SliceParts& value) {
Status WriteBatchBase::Put(ColumnFamilyHandle* column_family,
const SliceParts& key, const SliceParts& value) {
std::string key_buf, value_buf;
Slice key_slice(key, &key_buf);
Slice value_slice(value, &value_buf);

Put(column_family, key_slice, value_slice);
return Put(column_family, key_slice, value_slice);
}

void WriteBatchBase::Put(const SliceParts& key, const SliceParts& value) {
Status WriteBatchBase::Put(const SliceParts& key, const SliceParts& value) {
std::string key_buf, value_buf;
Slice key_slice(key, &key_buf);
Slice value_slice(value, &value_buf);

Put(key_slice, value_slice);
return Put(key_slice, value_slice);
}

void WriteBatchBase::Delete(ColumnFamilyHandle* column_family,
const SliceParts& key) {
Status WriteBatchBase::Delete(ColumnFamilyHandle* column_family,
const SliceParts& key) {
std::string key_buf;
Slice key_slice(key, &key_buf);
Delete(column_family, key_slice);
return Delete(column_family, key_slice);
}

void WriteBatchBase::Delete(const SliceParts& key) {
Status WriteBatchBase::Delete(const SliceParts& key) {
std::string key_buf;
Slice key_slice(key, &key_buf);
Delete(key_slice);
return Delete(key_slice);
}

void WriteBatchBase::SingleDelete(ColumnFamilyHandle* column_family,
const SliceParts& key) {
Status WriteBatchBase::SingleDelete(ColumnFamilyHandle* column_family,
const SliceParts& key) {
std::string key_buf;
Slice key_slice(key, &key_buf);
SingleDelete(column_family, key_slice);
return SingleDelete(column_family, key_slice);
}

void WriteBatchBase::SingleDelete(const SliceParts& key) {
Status WriteBatchBase::SingleDelete(const SliceParts& key) {
std::string key_buf;
Slice key_slice(key, &key_buf);
SingleDelete(key_slice);
return SingleDelete(key_slice);
}

void WriteBatchBase::DeleteRange(ColumnFamilyHandle* column_family,
const SliceParts& begin_key,
const SliceParts& end_key) {
Status WriteBatchBase::DeleteRange(ColumnFamilyHandle* column_family,
const SliceParts& begin_key,
const SliceParts& end_key) {
std::string begin_key_buf, end_key_buf;
Slice begin_key_slice(begin_key, &begin_key_buf);
Slice end_key_slice(end_key, &end_key_buf);
DeleteRange(column_family, begin_key_slice, end_key_slice);
return DeleteRange(column_family, begin_key_slice, end_key_slice);
}

void WriteBatchBase::DeleteRange(const SliceParts& begin_key,
const SliceParts& end_key) {
Status WriteBatchBase::DeleteRange(const SliceParts& begin_key,
const SliceParts& end_key) {
std::string begin_key_buf, end_key_buf;
Slice begin_key_slice(begin_key, &begin_key_buf);
Slice end_key_slice(end_key, &end_key_buf);
DeleteRange(begin_key_slice, end_key_slice);
return DeleteRange(begin_key_slice, end_key_slice);
}

void WriteBatchBase::Merge(ColumnFamilyHandle* column_family,
const SliceParts& key, const SliceParts& value) {
Status WriteBatchBase::Merge(ColumnFamilyHandle* column_family,
const SliceParts& key, const SliceParts& value) {
std::string key_buf, value_buf;
Slice key_slice(key, &key_buf);
Slice value_slice(value, &value_buf);

Merge(column_family, key_slice, value_slice);
return Merge(column_family, key_slice, value_slice);
}

void WriteBatchBase::Merge(const SliceParts& key, const SliceParts& value) {
Status WriteBatchBase::Merge(const SliceParts& key, const SliceParts& value) {
std::string key_buf, value_buf;
Slice key_slice(key, &key_buf);
Slice value_slice(value, &value_buf);

Merge(key_slice, value_slice);
return Merge(key_slice, value_slice);
}

} // namespace rocksdb
95 changes: 67 additions & 28 deletions db/write_batch_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,44 +68,44 @@ class WriteBatchInternal {
static const size_t kHeader = 12;

// WriteBatch methods with column_family_id instead of ColumnFamilyHandle*
static void Put(WriteBatch* batch, uint32_t column_family_id,
const Slice& key, const Slice& value);
static Status Put(WriteBatch* batch, uint32_t column_family_id,
const Slice& key, const Slice& value);

static void Put(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key, const SliceParts& value);
static Status Put(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key, const SliceParts& value);

static void Delete(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key);
static Status Delete(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key);

static void Delete(WriteBatch* batch, uint32_t column_family_id,
const Slice& key);
static Status Delete(WriteBatch* batch, uint32_t column_family_id,
const Slice& key);

static void SingleDelete(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key);
static Status SingleDelete(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key);

static void SingleDelete(WriteBatch* batch, uint32_t column_family_id,
const Slice& key);
static Status SingleDelete(WriteBatch* batch, uint32_t column_family_id,
const Slice& key);

static void DeleteRange(WriteBatch* b, uint32_t column_family_id,
const Slice& begin_key, const Slice& end_key);
static Status DeleteRange(WriteBatch* b, uint32_t column_family_id,
const Slice& begin_key, const Slice& end_key);

static void DeleteRange(WriteBatch* b, uint32_t column_family_id,
const SliceParts& begin_key,
const SliceParts& end_key);
static Status DeleteRange(WriteBatch* b, uint32_t column_family_id,
const SliceParts& begin_key,
const SliceParts& end_key);

static void Merge(WriteBatch* batch, uint32_t column_family_id,
const Slice& key, const Slice& value);
static Status Merge(WriteBatch* batch, uint32_t column_family_id,
const Slice& key, const Slice& value);

static void Merge(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key, const SliceParts& value);
static Status Merge(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key, const SliceParts& value);

static void MarkEndPrepare(WriteBatch* batch, const Slice& xid);
static Status MarkEndPrepare(WriteBatch* batch, const Slice& xid);

static void MarkRollback(WriteBatch* batch, const Slice& xid);
static Status MarkRollback(WriteBatch* batch, const Slice& xid);

static void MarkCommit(WriteBatch* batch, const Slice& xid);
static Status MarkCommit(WriteBatch* batch, const Slice& xid);

static void InsertNoop(WriteBatch* batch);
static Status InsertNoop(WriteBatch* batch);

// Return the number of entries in the batch.
static int Count(const WriteBatch* batch);
Expand All @@ -132,7 +132,7 @@ class WriteBatchInternal {
return batch->rep_.size();
}

static void SetContents(WriteBatch* batch, const Slice& contents);
static Status SetContents(WriteBatch* batch, const Slice& contents);

// Inserts batches[i] into memtable, for i in 0..num_batches-1 inclusive.
//
Expand Down Expand Up @@ -177,12 +177,51 @@ class WriteBatchInternal {
uint64_t log_number = 0, DB* db = nullptr,
bool concurrent_memtable_writes = false);

static void Append(WriteBatch* dst, const WriteBatch* src,
const bool WAL_only = false);
static Status Append(WriteBatch* dst, const WriteBatch* src,
const bool WAL_only = false);

// Returns the byte size of appending a WriteBatch with ByteSize
// leftByteSize and a WriteBatch with ByteSize rightByteSize
static size_t AppendedByteSize(size_t leftByteSize, size_t rightByteSize);
};

// LocalSavePoint is similar to a scope guard
class LocalSavePoint {
public:
explicit LocalSavePoint(WriteBatch* batch)
: batch_(batch),
savepoint_(batch->GetDataSize(), batch->Count(),
batch->content_flags_.load(std::memory_order_relaxed))
#ifndef NDEBUG
,
committed_(false)
#endif
{
}

#ifndef NDEBUG
~LocalSavePoint() { assert(committed_); }
#endif
Status commit() {
#ifndef NDEBUG
committed_ = true;
#endif
if (batch_->max_bytes_ && batch_->rep_.size() > batch_->max_bytes_) {
batch_->rep_.resize(savepoint_.size);
WriteBatchInternal::SetCount(batch_, savepoint_.count);
batch_->content_flags_.store(savepoint_.content_flags,
std::memory_order_relaxed);
return Status::MemoryLimit();
}
return Status::OK();
}

private:
WriteBatch* batch_;
SavePoint savepoint_;
#ifndef NDEBUG
bool committed_;
#endif
};

} // namespace rocksdb
12 changes: 12 additions & 0 deletions db/write_batch_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,18 @@ TEST_F(WriteBatchTest, SavePointTest) {
ASSERT_EQ("", PrintContents(&batch2));
}

TEST_F(WriteBatchTest, MemoryLimitTest) {
Status s;
// The header size is 12 bytes. The two Puts take 8 bytes which gives total
// of 12 + 8 * 2 = 28 bytes.
WriteBatch batch(0, 28);

ASSERT_OK(batch.Put("a", "...."));
ASSERT_OK(batch.Put("b", "...."));
s = batch.Put("c", "....");
ASSERT_TRUE(s.IsMemoryLimit());
}

} // namespace rocksdb

int main(int argc, char** argv) {
Expand Down
13 changes: 13 additions & 0 deletions include/rocksdb/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class Status {
kNoSpace = 4,
kDeadlock = 5,
kStaleFile = 6,
kMemoryLimit = 7,
kMaxSubCode
};

Expand Down Expand Up @@ -166,6 +167,11 @@ class Status {
return Status(kIOError, kNoSpace, msg, msg2);
}

static Status MemoryLimit() { return Status(kAborted, kMemoryLimit); }
static Status MemoryLimit(const Slice& msg, const Slice& msg2 = Slice()) {
return Status(kAborted, kMemoryLimit, msg, msg2);
}

// Returns true iff the status indicates success.
bool ok() const { return code() == kOk; }

Expand Down Expand Up @@ -224,6 +230,13 @@ class Status {
return (code() == kIOError) && (subcode() == kNoSpace);
}

// Returns true iff the status indicates a memory limit error. There may be
// cases where we limit the memory used in certain operations (eg. the size
// of a write batch) in order to avoid out of memory exceptions.
bool IsMemoryLimit() const {
return (code() == kAborted) && (subcode() == kMemoryLimit);
}

// Return a string representation of this status suitable for printing.
// Returns the string "OK" for success.
std::string ToString() const;
Expand Down
3 changes: 3 additions & 0 deletions include/rocksdb/utilities/transaction_db.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ struct TransactionOptions {

// The number of traversals to make during deadlock detection.
int64_t deadlock_detect_depth = 50;

// The maximum number of bytes used for the write batch. 0 means no limit.
size_t max_write_batch_size = 0;
};

struct KeyLockInfo {
Expand Down
37 changes: 21 additions & 16 deletions include/rocksdb/utilities/write_batch_with_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,42 +88,45 @@ class WriteBatchWithIndex : public WriteBatchBase {
// interface, or we can't find a column family from the column family handle
// passed in, backup_index_comparator will be used for the column family.
// reserved_bytes: reserved bytes in underlying WriteBatch
// max_bytes: maximum size of underlying WriteBatch in bytes
// overwrite_key: if true, overwrite the key in the index when inserting
// the same key as previously, so iterator will never
// show two entries with the same key.
explicit WriteBatchWithIndex(
const Comparator* backup_index_comparator = BytewiseComparator(),
size_t reserved_bytes = 0, bool overwrite_key = false);
size_t reserved_bytes = 0, bool overwrite_key = false,
size_t max_bytes = 0);

virtual ~WriteBatchWithIndex();

using WriteBatchBase::Put;
void Put(ColumnFamilyHandle* column_family, const Slice& key,
const Slice& value) override;
Status Put(ColumnFamilyHandle* column_family, const Slice& key,
const Slice& value) override;

void Put(const Slice& key, const Slice& value) override;
Status Put(const Slice& key, const Slice& value) override;

using WriteBatchBase::Merge;
void Merge(ColumnFamilyHandle* column_family, const Slice& key,
const Slice& value) override;
Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
const Slice& value) override;

void Merge(const Slice& key, const Slice& value) override;
Status Merge(const Slice& key, const Slice& value) override;

using WriteBatchBase::Delete;
void Delete(ColumnFamilyHandle* column_family, const Slice& key) override;
void Delete(const Slice& key) override;
Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override;
Status Delete(const Slice& key) override;

using WriteBatchBase::SingleDelete;
void SingleDelete(ColumnFamilyHandle* column_family,
const Slice& key) override;
void SingleDelete(const Slice& key) override;
Status SingleDelete(ColumnFamilyHandle* column_family,
const Slice& key) override;
Status SingleDelete(const Slice& key) override;

using WriteBatchBase::DeleteRange;
void DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key,
const Slice& end_key) override;
void DeleteRange(const Slice& begin_key, const Slice& end_key) override;
Status DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key,
const Slice& end_key) override;
Status DeleteRange(const Slice& begin_key, const Slice& end_key) override;

using WriteBatchBase::PutLogData;
void PutLogData(const Slice& blob) override;
Status PutLogData(const Slice& blob) override;

using WriteBatchBase::Clear;
void Clear() override;
Expand Down Expand Up @@ -204,6 +207,8 @@ class WriteBatchWithIndex : public WriteBatchBase {
// or other Status on corruption.
Status RollbackToSavePoint() override;

void SetMaxBytes(size_t max_bytes) override;

private:
struct Rep;
std::unique_ptr<Rep> rep;
Expand Down
Loading

0 comments on commit 1f8b119

Please sign in to comment.