Skip to content

Commit

Permalink
WriteBatch Save Points
Browse files Browse the repository at this point in the history
Summary:
Support RollbackToSavePoint() in WriteBatch and WriteBatchWithIndex.  Support for partial transaction rollback is needed for MyRocks.

An alternate implementation of Transaction::RollbackToSavePoint() exists in D40869.  However, the other implementation is messier because it is implemented outside of WriteBatch.  This implementation is much cleaner and also exposes a potentially useful feature to WriteBatch.

Test Plan: Added unit tests

Reviewers: IslamAbdelRahman, kradhakrishnan, maykov, yoshinorim, hermanlee4, spetrunia, sdong, yhchiang

Reviewed By: yhchiang

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D42723
  • Loading branch information
agiardullo committed Jul 29, 2015
1 parent 7bfae3a commit 8161bdb
Show file tree
Hide file tree
Showing 10 changed files with 442 additions and 6 deletions.
3 changes: 3 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## Unreleased

### New Features
* RollbackToSavePoint() in WriteBatch/WriteBatchWithIndex

### Public API Changes
* Deprecated WriteOptions::timeout_hint_us. We no longer support write timeout. If you really need this option, talk to us and we might consider returning it.
* Deprecated purge_redundant_kvs_while_flush option.
Expand Down
63 changes: 60 additions & 3 deletions db/write_batch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
// data: uint8[len]

#include "rocksdb/write_batch.h"

#include <stack>
#include <stdexcept>

#include "rocksdb/merge_operator.h"
#include "db/dbformat.h"
#include "db/db_impl.h"
Expand All @@ -32,20 +36,33 @@
#include "db/write_batch_internal.h"
#include "util/coding.h"
#include "util/statistics.h"
#include <stdexcept>
#include "util/perf_context_imp.h"

namespace rocksdb {

// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
static const size_t kHeader = 12;

WriteBatch::WriteBatch(size_t reserved_bytes) {
struct SavePoint {
size_t size; // size of rep_
int count; // count of elements in rep_
SavePoint(size_t s, int c) : size(s), count(c) {}
};

struct SavePoints {
std::stack<SavePoint> stack;
};

WriteBatch::WriteBatch(size_t reserved_bytes) : save_points_(nullptr) {
rep_.reserve((reserved_bytes > kHeader) ? reserved_bytes : kHeader);
Clear();
}

WriteBatch::~WriteBatch() { }
WriteBatch::~WriteBatch() {
if (save_points_ != nullptr) {
delete save_points_;
}
}

WriteBatch::Handler::~Handler() { }

Expand All @@ -61,6 +78,12 @@ bool WriteBatch::Handler::Continue() {
void WriteBatch::Clear() {
rep_.clear();
rep_.resize(kHeader);

if (save_points_ != nullptr) {
while (!save_points_->stack.empty()) {
save_points_->stack.pop();
}
}
}

int WriteBatch::Count() const {
Expand Down Expand Up @@ -188,6 +211,8 @@ void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) {
EncodeFixed64(&b->rep_[0], seq);
}

size_t WriteBatchInternal::GetFirstOffset(WriteBatch* b) { return kHeader; }

void WriteBatchInternal::Put(WriteBatch* b, uint32_t column_family_id,
const Slice& key, const Slice& value) {
WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
Expand Down Expand Up @@ -301,6 +326,38 @@ void WriteBatch::PutLogData(const Slice& blob) {
PutLengthPrefixedSlice(&rep_, blob);
}

void WriteBatch::SetSavePoint() {
if (save_points_ == nullptr) {
save_points_ = new SavePoints();
}
// Record length and count of current batch of writes.
save_points_->stack.push(SavePoint(GetDataSize(), Count()));
}

Status WriteBatch::RollbackToSavePoint() {
if (save_points_ == nullptr || save_points_->stack.size() == 0) {
return Status::NotFound();
}

// Pop the most recent savepoint off the stack
SavePoint savepoint = save_points_->stack.top();
save_points_->stack.pop();

assert(savepoint.size <= rep_.size());

if (savepoint.size == rep_.size()) {
// No changes to rollback
} else if (savepoint.size == 0) {
// Rollback everything
Clear();
} else {
rep_.resize(savepoint.size);
WriteBatchInternal::SetCount(this, savepoint.count);
}

return Status::OK();
}

namespace {
// This class can *only* be used from a single-threaded write thread, because it
// calls ColumnFamilyMemTablesImpl::Seek()
Expand Down
4 changes: 4 additions & 0 deletions db/write_batch_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ class WriteBatchInternal {
// this batch.
static void SetSequence(WriteBatch* batch, SequenceNumber seq);

// Returns the offset of the first entry in the batch.
// This offset is only valid if the batch is not empty.
static size_t GetFirstOffset(WriteBatch* batch);

static Slice Contents(const WriteBatch* batch) {
return Slice(batch->rep_);
}
Expand Down
105 changes: 105 additions & 0 deletions db/write_batch_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,111 @@ TEST_F(WriteBatchTest, ColumnFamiliesBatchWithIndexTest) {
}
#endif // !ROCKSDB_LITE

TEST_F(WriteBatchTest, SavePointTest) {
Status s;
WriteBatch batch;
batch.SetSavePoint();

batch.Put("A", "a");
batch.Put("B", "b");
batch.SetSavePoint();

batch.Put("C", "c");
batch.Delete("A");
batch.SetSavePoint();
batch.SetSavePoint();

ASSERT_OK(batch.RollbackToSavePoint());
ASSERT_EQ(
"Delete(A)@3"
"Put(A, a)@0"
"Put(B, b)@1"
"Put(C, c)@2",
PrintContents(&batch));

ASSERT_OK(batch.RollbackToSavePoint());
ASSERT_OK(batch.RollbackToSavePoint());
ASSERT_EQ(
"Put(A, a)@0"
"Put(B, b)@1",
PrintContents(&batch));

batch.Delete("A");
batch.Put("B", "bb");

ASSERT_OK(batch.RollbackToSavePoint());
ASSERT_EQ("", PrintContents(&batch));

s = batch.RollbackToSavePoint();
ASSERT_TRUE(s.IsNotFound());
ASSERT_EQ("", PrintContents(&batch));

batch.Put("D", "d");
batch.Delete("A");

batch.SetSavePoint();

batch.Put("A", "aaa");

ASSERT_OK(batch.RollbackToSavePoint());
ASSERT_EQ(
"Delete(A)@1"
"Put(D, d)@0",
PrintContents(&batch));

batch.SetSavePoint();

batch.Put("D", "d");
batch.Delete("A");

ASSERT_OK(batch.RollbackToSavePoint());
ASSERT_EQ(
"Delete(A)@1"
"Put(D, d)@0",
PrintContents(&batch));

s = batch.RollbackToSavePoint();
ASSERT_TRUE(s.IsNotFound());
ASSERT_EQ(
"Delete(A)@1"
"Put(D, d)@0",
PrintContents(&batch));

WriteBatch batch2;

s = batch2.RollbackToSavePoint();
ASSERT_TRUE(s.IsNotFound());
ASSERT_EQ("", PrintContents(&batch2));

batch2.Delete("A");
batch2.SetSavePoint();

s = batch2.RollbackToSavePoint();
ASSERT_OK(s);
ASSERT_EQ("Delete(A)@0", PrintContents(&batch2));

batch2.Clear();
ASSERT_EQ("", PrintContents(&batch2));

batch2.SetSavePoint();

batch2.Delete("B");
ASSERT_EQ("Delete(B)@0", PrintContents(&batch2));

batch2.SetSavePoint();
s = batch2.RollbackToSavePoint();
ASSERT_OK(s);
ASSERT_EQ("Delete(B)@0", PrintContents(&batch2));

s = batch2.RollbackToSavePoint();
ASSERT_OK(s);
ASSERT_EQ("", PrintContents(&batch2));

s = batch2.RollbackToSavePoint();
ASSERT_TRUE(s.IsNotFound());
ASSERT_EQ("", PrintContents(&batch2));
}

} // namespace rocksdb

int main(int argc, char** argv) {
Expand Down
16 changes: 16 additions & 0 deletions include/rocksdb/utilities/write_batch_with_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,22 @@ class WriteBatchWithIndex : public WriteBatchBase {
ColumnFamilyHandle* column_family, const Slice& key,
std::string* value);

// Records the state of the batch for future calls to RollbackToSavePoint().
// May be called multiple times to set multiple save points.
void SetSavePoint() override;

// Remove all entries in this batch (Put, Merge, Delete, PutLogData) since the
// most recent call to SetSavePoint() and removes the most recent save point.
// If there is no previous call to SetSavePoint(), behaves the same as
// Clear().
//
// Calling RollbackToSavePoint invalidates any open iterators on this batch.
//
// Returns Status::OK() on success,
// Status::NotFound() if no previous call to SetSavePoint(),
// or other Status on corruption.
Status RollbackToSavePoint() override;

private:
struct Rep;
Rep* rep;
Expand Down
16 changes: 15 additions & 1 deletion include/rocksdb/write_batch.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#ifndef STORAGE_ROCKSDB_INCLUDE_WRITE_BATCH_H_
#define STORAGE_ROCKSDB_INCLUDE_WRITE_BATCH_H_

#include <stack>
#include <string>
#include <stdint.h>
#include "rocksdb/status.h"
Expand All @@ -34,6 +35,7 @@ namespace rocksdb {

class Slice;
class ColumnFamilyHandle;
struct SavePoints;
struct SliceParts;

class WriteBatch : public WriteBatchBase {
Expand Down Expand Up @@ -101,6 +103,17 @@ class WriteBatch : public WriteBatchBase {
// Clear all updates buffered in this batch.
void Clear() override;

// Records the state of the batch for future calls to RollbackToSavePoint().
// May be called multiple times to set multiple save points.
void SetSavePoint() override;

// Remove all entries in this batch (Put, Merge, Delete, PutLogData) since the
// most recent call to SetSavePoint() and removes the most recent save point.
// If there is no previous call to SetSavePoint(), Status::NotFound()
// will be returned.
// Oterwise returns Status::OK().
Status RollbackToSavePoint() override;

// Support for iterating over the contents of a batch.
class Handler {
public:
Expand Down Expand Up @@ -168,10 +181,11 @@ class WriteBatch : public WriteBatchBase {
WriteBatch* GetWriteBatch() override { return this; }

// Constructor with a serialized string object
explicit WriteBatch(std::string rep): rep_(rep) {}
explicit WriteBatch(std::string rep) : save_points_(nullptr), rep_(rep) {}

private:
friend class WriteBatchInternal;
SavePoints* save_points_;

protected:
std::string rep_; // See comment in write_batch.cc for the format of rep_
Expand Down
11 changes: 11 additions & 0 deletions include/rocksdb/write_batch_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
namespace rocksdb {

class Slice;
class Status;
class ColumnFamilyHandle;
class WriteBatch;
struct SliceParts;
Expand Down Expand Up @@ -72,6 +73,16 @@ class WriteBatchBase {
// converting any WriteBatchBase(eg WriteBatchWithIndex) into a basic
// WriteBatch.
virtual WriteBatch* GetWriteBatch() = 0;

// Records the state of the batch for future calls to RollbackToSavePoint().
// May be called multiple times to set multiple save points.
virtual void SetSavePoint() = 0;

// Remove all entries in this batch (Put, Merge, Delete, PutLogData) since the
// most recent call to SetSavePoint() and removes the most recent save point.
// If there is no previous call to SetSavePoint(), behaves the same as
// Clear().
virtual Status RollbackToSavePoint() = 0;
};

} // namespace rocksdb
Loading

0 comments on commit 8161bdb

Please sign in to comment.